diff --git a/.ci/docker-compose-file/cassandra/Dockerfile b/.ci/docker-compose-file/cassandra/Dockerfile deleted file mode 100644 index f974c1b6f..000000000 --- a/.ci/docker-compose-file/cassandra/Dockerfile +++ /dev/null @@ -1,4 +0,0 @@ -ARG CASSANDRA_TAG=3.11.6 -FROM cassandra:${CASSANDRA_TAG} -COPY cassandra.yaml /etc/cassandra/cassandra.yaml -CMD ["cassandra", "-f"] diff --git a/.ci/docker-compose-file/cassandra/cassandra_noauth.yaml b/.ci/docker-compose-file/cassandra/cassandra_noauth.yaml new file mode 100644 index 000000000..eff87061d --- /dev/null +++ b/.ci/docker-compose-file/cassandra/cassandra_noauth.yaml @@ -0,0 +1,1236 @@ +# Cassandra storage config YAML + +# NOTE: +# See http://wiki.apache.org/cassandra/StorageConfiguration for +# full explanations of configuration directives +# /NOTE + +# The name of the cluster. This is mainly used to prevent machines in +# one logical cluster from joining another. +cluster_name: 'Test Cluster' + +# This defines the number of tokens randomly assigned to this node on the ring +# The more tokens, relative to other nodes, the larger the proportion of data +# that this node will store. You probably want all nodes to have the same number +# of tokens assuming they have equal hardware capability. +# +# If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility, +# and will use the initial_token as described below. +# +# Specifying initial_token will override this setting on the node's initial start, +# on subsequent starts, this setting will apply even if initial token is set. +# +# If you already have a cluster with 1 token per node, and wish to migrate to +# multiple tokens per node, see http://wiki.apache.org/cassandra/Operations +num_tokens: 256 + +# Triggers automatic allocation of num_tokens tokens for this node. The allocation +# algorithm attempts to choose tokens in a way that optimizes replicated load over +# the nodes in the datacenter for the replication strategy used by the specified +# keyspace. +# +# The load assigned to each node will be close to proportional to its number of +# vnodes. +# +# Only supported with the Murmur3Partitioner. +# allocate_tokens_for_keyspace: KEYSPACE + +# initial_token allows you to specify tokens manually. While you can use it with +# vnodes (num_tokens > 1, above) -- in which case you should provide a +# comma-separated list -- it's primarily used when adding nodes to legacy clusters +# that do not have vnodes enabled. +# initial_token: + +# See http://wiki.apache.org/cassandra/HintedHandoff +# May either be "true" or "false" to enable globally +hinted_handoff_enabled: true + +# When hinted_handoff_enabled is true, a black list of data centers that will not +# perform hinted handoff +# hinted_handoff_disabled_datacenters: +# - DC1 +# - DC2 + +# this defines the maximum amount of time a dead host will have hints +# generated. After it has been dead this long, new hints for it will not be +# created until it has been seen alive and gone down again. +max_hint_window_in_ms: 10800000 # 3 hours + +# Maximum throttle in KBs per second, per delivery thread. This will be +# reduced proportionally to the number of nodes in the cluster. (If there +# are two nodes in the cluster, each delivery thread will use the maximum +# rate; if there are three, each will throttle to half of the maximum, +# since we expect two nodes to be delivering hints simultaneously.) +hinted_handoff_throttle_in_kb: 1024 + +# Number of threads with which to deliver hints; +# Consider increasing this number when you have multi-dc deployments, since +# cross-dc handoff tends to be slower +max_hints_delivery_threads: 2 + +# Directory where Cassandra should store hints. +# If not set, the default directory is $CASSANDRA_HOME/data/hints. +# hints_directory: /var/lib/cassandra/hints + +# How often hints should be flushed from the internal buffers to disk. +# Will *not* trigger fsync. +hints_flush_period_in_ms: 10000 + +# Maximum size for a single hints file, in megabytes. +max_hints_file_size_in_mb: 128 + +# Compression to apply to the hint files. If omitted, hints files +# will be written uncompressed. LZ4, Snappy, and Deflate compressors +# are supported. +#hints_compression: +# - class_name: LZ4Compressor +# parameters: +# - + +# Maximum throttle in KBs per second, total. This will be +# reduced proportionally to the number of nodes in the cluster. +batchlog_replay_throttle_in_kb: 1024 + +# Authentication backend, implementing IAuthenticator; used to identify users +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator, +# PasswordAuthenticator}. +# +# - AllowAllAuthenticator performs no checks - set it to disable authentication. +# - PasswordAuthenticator relies on username/password pairs to authenticate +# users. It keeps usernames and hashed passwords in system_auth.roles table. +# Please increase system_auth keyspace replication factor if you use this authenticator. +# If using PasswordAuthenticator, CassandraRoleManager must also be used (see below) +authenticator: AllowAllAuthenticator + +# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer, +# CassandraAuthorizer}. +# +# - AllowAllAuthorizer allows any action to any user - set it to disable authorization. +# - CassandraAuthorizer stores permissions in system_auth.role_permissions table. Please +# increase system_auth keyspace replication factor if you use this authorizer. +authorizer: AllowAllAuthorizer + +# Part of the Authentication & Authorization backend, implementing IRoleManager; used +# to maintain grants and memberships between roles. +# Out of the box, Cassandra provides org.apache.cassandra.auth.CassandraRoleManager, +# which stores role information in the system_auth keyspace. Most functions of the +# IRoleManager require an authenticated login, so unless the configured IAuthenticator +# actually implements authentication, most of this functionality will be unavailable. +# +# - CassandraRoleManager stores role data in the system_auth keyspace. Please +# increase system_auth keyspace replication factor if you use this role manager. +role_manager: CassandraRoleManager + +# Validity period for roles cache (fetching granted roles can be an expensive +# operation depending on the role manager, CassandraRoleManager is one example) +# Granted roles are cached for authenticated sessions in AuthenticatedUser and +# after the period specified here, become eligible for (async) reload. +# Defaults to 2000, set to 0 to disable caching entirely. +# Will be disabled automatically for AllowAllAuthenticator. +roles_validity_in_ms: 2000 + +# Refresh interval for roles cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If roles_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as roles_validity_in_ms. +# roles_update_interval_in_ms: 2000 + +# Validity period for permissions cache (fetching permissions can be an +# expensive operation depending on the authorizer, CassandraAuthorizer is +# one example). Defaults to 2000, set to 0 to disable. +# Will be disabled automatically for AllowAllAuthorizer. +permissions_validity_in_ms: 2000 + +# Refresh interval for permissions cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If permissions_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as permissions_validity_in_ms. +# permissions_update_interval_in_ms: 2000 + +# Validity period for credentials cache. This cache is tightly coupled to +# the provided PasswordAuthenticator implementation of IAuthenticator. If +# another IAuthenticator implementation is configured, this cache will not +# be automatically used and so the following settings will have no effect. +# Please note, credentials are cached in their encrypted form, so while +# activating this cache may reduce the number of queries made to the +# underlying table, it may not bring a significant reduction in the +# latency of individual authentication attempts. +# Defaults to 2000, set to 0 to disable credentials caching. +credentials_validity_in_ms: 2000 + +# Refresh interval for credentials cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If credentials_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as credentials_validity_in_ms. +# credentials_update_interval_in_ms: 2000 + +# The partitioner is responsible for distributing groups of rows (by +# partition key) across nodes in the cluster. You should leave this +# alone for new clusters. The partitioner can NOT be changed without +# reloading all data, so when upgrading you should set this to the +# same partitioner you were already using. +# +# Besides Murmur3Partitioner, partitioners included for backwards +# compatibility include RandomPartitioner, ByteOrderedPartitioner, and +# OrderPreservingPartitioner. +# +partitioner: org.apache.cassandra.dht.Murmur3Partitioner + +# Directories where Cassandra should store data on disk. Cassandra +# will spread data evenly across them, subject to the granularity of +# the configured compaction strategy. +# If not set, the default directory is $CASSANDRA_HOME/data/data. +data_file_directories: + - /var/lib/cassandra/data + +# commit log. when running on magnetic HDD, this should be a +# separate spindle than the data directories. +# If not set, the default directory is $CASSANDRA_HOME/data/commitlog. +commitlog_directory: /var/lib/cassandra/commitlog + +# Enable / disable CDC functionality on a per-node basis. This modifies the logic used +# for write path allocation rejection (standard: never reject. cdc: reject Mutation +# containing a CDC-enabled table if at space limit in cdc_raw_directory). +cdc_enabled: false + +# CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the +# segment contains mutations for a CDC-enabled table. This should be placed on a +# separate spindle than the data directories. If not set, the default directory is +# $CASSANDRA_HOME/data/cdc_raw. +# cdc_raw_directory: /var/lib/cassandra/cdc_raw + +# Policy for data disk failures: +# +# die +# shut down gossip and client transports and kill the JVM for any fs errors or +# single-sstable errors, so the node can be replaced. +# +# stop_paranoid +# shut down gossip and client transports even for single-sstable errors, +# kill the JVM for errors during startup. +# +# stop +# shut down gossip and client transports, leaving the node effectively dead, but +# can still be inspected via JMX, kill the JVM for errors during startup. +# +# best_effort +# stop using the failed disk and respond to requests based on +# remaining available sstables. This means you WILL see obsolete +# data at CL.ONE! +# +# ignore +# ignore fatal errors and let requests fail, as in pre-1.2 Cassandra +disk_failure_policy: stop + +# Policy for commit disk failures: +# +# die +# shut down gossip and Thrift and kill the JVM, so the node can be replaced. +# +# stop +# shut down gossip and Thrift, leaving the node effectively dead, but +# can still be inspected via JMX. +# +# stop_commit +# shutdown the commit log, letting writes collect but +# continuing to service reads, as in pre-2.0.5 Cassandra +# +# ignore +# ignore fatal errors and let the batches fail +commit_failure_policy: stop + +# Maximum size of the native protocol prepared statement cache +# +# Valid values are either "auto" (omitting the value) or a value greater 0. +# +# Note that specifying a too large value will result in long running GCs and possbily +# out-of-memory errors. Keep the value at a small fraction of the heap. +# +# If you constantly see "prepared statements discarded in the last minute because +# cache limit reached" messages, the first step is to investigate the root cause +# of these messages and check whether prepared statements are used correctly - +# i.e. use bind markers for variable parts. +# +# Do only change the default value, if you really have more prepared statements than +# fit in the cache. In most cases it is not neccessary to change this value. +# Constantly re-preparing statements is a performance penalty. +# +# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater +prepared_statements_cache_size_mb: + +# Maximum size of the Thrift prepared statement cache +# +# If you do not use Thrift at all, it is safe to leave this value at "auto". +# +# See description of 'prepared_statements_cache_size_mb' above for more information. +# +# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater +thrift_prepared_statements_cache_size_mb: + +# Maximum size of the key cache in memory. +# +# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the +# minimum, sometimes more. The key cache is fairly tiny for the amount of +# time it saves, so it's worthwhile to use it at large numbers. +# The row cache saves even more time, but must contain the entire row, +# so it is extremely space-intensive. It's best to only use the +# row cache if you have hot rows or static rows. +# +# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. +# +# Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache. +key_cache_size_in_mb: + +# Duration in seconds after which Cassandra should +# save the key cache. Caches are saved to saved_caches_directory as +# specified in this configuration file. +# +# Saved caches greatly improve cold-start speeds, and is relatively cheap in +# terms of I/O for the key cache. Row cache saving is much more expensive and +# has limited use. +# +# Default is 14400 or 4 hours. +key_cache_save_period: 14400 + +# Number of keys from the key cache to save +# Disabled by default, meaning all keys are going to be saved +# key_cache_keys_to_save: 100 + +# Row cache implementation class name. Available implementations: +# +# org.apache.cassandra.cache.OHCProvider +# Fully off-heap row cache implementation (default). +# +# org.apache.cassandra.cache.SerializingCacheProvider +# This is the row cache implementation availabile +# in previous releases of Cassandra. +# row_cache_class_name: org.apache.cassandra.cache.OHCProvider + +# Maximum size of the row cache in memory. +# Please note that OHC cache implementation requires some additional off-heap memory to manage +# the map structures and some in-flight memory during operations before/after cache entries can be +# accounted against the cache capacity. This overhead is usually small compared to the whole capacity. +# Do not specify more memory that the system can afford in the worst usual situation and leave some +# headroom for OS block level cache. Do never allow your system to swap. +# +# Default value is 0, to disable row caching. +row_cache_size_in_mb: 0 + +# Duration in seconds after which Cassandra should save the row cache. +# Caches are saved to saved_caches_directory as specified in this configuration file. +# +# Saved caches greatly improve cold-start speeds, and is relatively cheap in +# terms of I/O for the key cache. Row cache saving is much more expensive and +# has limited use. +# +# Default is 0 to disable saving the row cache. +row_cache_save_period: 0 + +# Number of keys from the row cache to save. +# Specify 0 (which is the default), meaning all keys are going to be saved +# row_cache_keys_to_save: 100 + +# Maximum size of the counter cache in memory. +# +# Counter cache helps to reduce counter locks' contention for hot counter cells. +# In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before +# write entirely. With RF > 1 a counter cache hit will still help to reduce the duration +# of the lock hold, helping with hot counter cell updates, but will not allow skipping +# the read entirely. Only the local (clock, count) tuple of a counter cell is kept +# in memory, not the whole counter, so it's relatively cheap. +# +# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. +# +# Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache. +# NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache. +counter_cache_size_in_mb: + +# Duration in seconds after which Cassandra should +# save the counter cache (keys only). Caches are saved to saved_caches_directory as +# specified in this configuration file. +# +# Default is 7200 or 2 hours. +counter_cache_save_period: 7200 + +# Number of keys from the counter cache to save +# Disabled by default, meaning all keys are going to be saved +# counter_cache_keys_to_save: 100 + +# saved caches +# If not set, the default directory is $CASSANDRA_HOME/data/saved_caches. +saved_caches_directory: /var/lib/cassandra/saved_caches + +# commitlog_sync may be either "periodic" or "batch." +# +# When in batch mode, Cassandra won't ack writes until the commit log +# has been fsynced to disk. It will wait +# commitlog_sync_batch_window_in_ms milliseconds between fsyncs. +# This window should be kept short because the writer threads will +# be unable to do extra work while waiting. (You may need to increase +# concurrent_writes for the same reason.) +# +# commitlog_sync: batch +# commitlog_sync_batch_window_in_ms: 2 +# +# the other option is "periodic" where writes may be acked immediately +# and the CommitLog is simply synced every commitlog_sync_period_in_ms +# milliseconds. +commitlog_sync: periodic +commitlog_sync_period_in_ms: 10000 + +# The size of the individual commitlog file segments. A commitlog +# segment may be archived, deleted, or recycled once all the data +# in it (potentially from each columnfamily in the system) has been +# flushed to sstables. +# +# The default size is 32, which is almost always fine, but if you are +# archiving commitlog segments (see commitlog_archiving.properties), +# then you probably want a finer granularity of archiving; 8 or 16 MB +# is reasonable. +# Max mutation size is also configurable via max_mutation_size_in_kb setting in +# cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024. +# This should be positive and less than 2048. +# +# NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must +# be set to at least twice the size of max_mutation_size_in_kb / 1024 +# +commitlog_segment_size_in_mb: 32 + +# Compression to apply to the commit log. If omitted, the commit log +# will be written uncompressed. LZ4, Snappy, and Deflate compressors +# are supported. +# commitlog_compression: +# - class_name: LZ4Compressor +# parameters: +# - + +# any class that implements the SeedProvider interface and has a +# constructor that takes a Map of parameters will do. +seed_provider: + # Addresses of hosts that are deemed contact points. + # Cassandra nodes use this list of hosts to find each other and learn + # the topology of the ring. You must change this if you are running + # multiple nodes! + - class_name: org.apache.cassandra.locator.SimpleSeedProvider + parameters: + # seeds is actually a comma-delimited list of addresses. + # Ex: ",," + - seeds: "127.0.0.1" + +# For workloads with more data than can fit in memory, Cassandra's +# bottleneck will be reads that need to fetch data from +# disk. "concurrent_reads" should be set to (16 * number_of_drives) in +# order to allow the operations to enqueue low enough in the stack +# that the OS and drives can reorder them. Same applies to +# "concurrent_counter_writes", since counter writes read the current +# values before incrementing and writing them back. +# +# On the other hand, since writes are almost never IO bound, the ideal +# number of "concurrent_writes" is dependent on the number of cores in +# your system; (8 * number_of_cores) is a good rule of thumb. +concurrent_reads: 32 +concurrent_writes: 32 +concurrent_counter_writes: 32 + +# For materialized view writes, as there is a read involved, so this should +# be limited by the less of concurrent reads or concurrent writes. +concurrent_materialized_view_writes: 32 + +# Maximum memory to use for sstable chunk cache and buffer pooling. +# 32MB of this are reserved for pooling buffers, the rest is used as an +# cache that holds uncompressed sstable chunks. +# Defaults to the smaller of 1/4 of heap or 512MB. This pool is allocated off-heap, +# so is in addition to the memory allocated for heap. The cache also has on-heap +# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size +# if the default 64k chunk size is used). +# Memory is only allocated when needed. +# file_cache_size_in_mb: 512 + +# Flag indicating whether to allocate on or off heap when the sstable buffer +# pool is exhausted, that is when it has exceeded the maximum memory +# file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request. + +# buffer_pool_use_heap_if_exhausted: true + +# The strategy for optimizing disk read +# Possible values are: +# ssd (for solid state disks, the default) +# spinning (for spinning disks) +# disk_optimization_strategy: ssd + +# Total permitted memory to use for memtables. Cassandra will stop +# accepting writes when the limit is exceeded until a flush completes, +# and will trigger a flush based on memtable_cleanup_threshold +# If omitted, Cassandra will set both to 1/4 the size of the heap. +memtable_heap_space_in_mb: 2048 +memtable_offheap_space_in_mb: 2048 + +# memtable_cleanup_threshold is deprecated. The default calculation +# is the only reasonable choice. See the comments on memtable_flush_writers +# for more information. +# +# Ratio of occupied non-flushing memtable size to total permitted size +# that will trigger a flush of the largest memtable. Larger mct will +# mean larger flushes and hence less compaction, but also less concurrent +# flush activity which can make it difficult to keep your disks fed +# under heavy write load. +# +# memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1) +# memtable_cleanup_threshold: 0.11 + +# Specify the way Cassandra allocates and manages memtable memory. +# Options are: +# +# heap_buffers +# on heap nio buffers +# +# offheap_buffers +# off heap (direct) nio buffers +# +# offheap_objects +# off heap objects +memtable_allocation_type: heap_buffers + +# Total space to use for commit logs on disk. +# +# If space gets above this value, Cassandra will flush every dirty CF +# in the oldest segment and remove it. So a small total commitlog space +# will tend to cause more flush activity on less-active columnfamilies. +# +# The default value is the smaller of 8192, and 1/4 of the total space +# of the commitlog volume. +# +# commitlog_total_space_in_mb: 8192 + +# This sets the number of memtable flush writer threads per disk +# as well as the total number of memtables that can be flushed concurrently. +# These are generally a combination of compute and IO bound. +# +# Memtable flushing is more CPU efficient than memtable ingest and a single thread +# can keep up with the ingest rate of a whole server on a single fast disk +# until it temporarily becomes IO bound under contention typically with compaction. +# At that point you need multiple flush threads. At some point in the future +# it may become CPU bound all the time. +# +# You can tell if flushing is falling behind using the MemtablePool.BlockedOnAllocation +# metric which should be 0, but will be non-zero if threads are blocked waiting on flushing +# to free memory. +# +# memtable_flush_writers defaults to two for a single data directory. +# This means that two memtables can be flushed concurrently to the single data directory. +# If you have multiple data directories the default is one memtable flushing at a time +# but the flush will use a thread per data directory so you will get two or more writers. +# +# Two is generally enough to flush on a fast disk [array] mounted as a single data directory. +# Adding more flush writers will result in smaller more frequent flushes that introduce more +# compaction overhead. +# +# There is a direct tradeoff between number of memtables that can be flushed concurrently +# and flush size and frequency. More is not better you just need enough flush writers +# to never stall waiting for flushing to free memory. +# +#memtable_flush_writers: 2 + +# Total space to use for change-data-capture logs on disk. +# +# If space gets above this value, Cassandra will throw WriteTimeoutException +# on Mutations including tables with CDC enabled. A CDCCompactor is responsible +# for parsing the raw CDC logs and deleting them when parsing is completed. +# +# The default value is the min of 4096 mb and 1/8th of the total space +# of the drive where cdc_raw_directory resides. +# cdc_total_space_in_mb: 4096 + +# When we hit our cdc_raw limit and the CDCCompactor is either running behind +# or experiencing backpressure, we check at the following interval to see if any +# new space for cdc-tracked tables has been made available. Default to 250ms +# cdc_free_space_check_interval_ms: 250 + +# A fixed memory pool size in MB for for SSTable index summaries. If left +# empty, this will default to 5% of the heap size. If the memory usage of +# all index summaries exceeds this limit, SSTables with low read rates will +# shrink their index summaries in order to meet this limit. However, this +# is a best-effort process. In extreme conditions Cassandra may need to use +# more than this amount of memory. +index_summary_capacity_in_mb: + +# How frequently index summaries should be resampled. This is done +# periodically to redistribute memory from the fixed-size pool to sstables +# proportional their recent read rates. Setting to -1 will disable this +# process, leaving existing index summaries at their current sampling level. +index_summary_resize_interval_in_minutes: 60 + +# Whether to, when doing sequential writing, fsync() at intervals in +# order to force the operating system to flush the dirty +# buffers. Enable this to avoid sudden dirty buffer flushing from +# impacting read latencies. Almost always a good idea on SSDs; not +# necessarily on platters. +trickle_fsync: false +trickle_fsync_interval_in_kb: 10240 + +# TCP port, for commands and data +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +storage_port: 7000 + +# SSL port, for encrypted communication. Unused unless enabled in +# encryption_options +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +ssl_storage_port: 7001 + +# Address or interface to bind to and tell other Cassandra nodes to connect to. +# You _must_ change this if you want multiple nodes to be able to communicate! +# +# Set listen_address OR listen_interface, not both. +# +# Leaving it blank leaves it up to InetAddress.getLocalHost(). This +# will always do the Right Thing _if_ the node is properly configured +# (hostname, name resolution, etc), and the Right Thing is to use the +# address associated with the hostname (it might not be). +# +# Setting listen_address to 0.0.0.0 is always wrong. +# +listen_address: localhost + +# Set listen_address OR listen_interface, not both. Interfaces must correspond +# to a single address, IP aliasing is not supported. +# listen_interface: eth0 + +# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address +# you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4 +# address will be used. If true the first ipv6 address will be used. Defaults to false preferring +# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. +# listen_interface_prefer_ipv6: false + +# Address to broadcast to other Cassandra nodes +# Leaving this blank will set it to the same value as listen_address +# broadcast_address: 1.2.3.4 + +# When using multiple physical network interfaces, set this +# to true to listen on broadcast_address in addition to +# the listen_address, allowing nodes to communicate in both +# interfaces. +# Ignore this property if the network configuration automatically +# routes between the public and private networks such as EC2. +# listen_on_broadcast_address: false + +# Internode authentication backend, implementing IInternodeAuthenticator; +# used to allow/disallow connections from peer nodes. +# internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator + +# Whether to start the native transport server. +# Please note that the address on which the native transport is bound is the +# same as the rpc_address. The port however is different and specified below. +start_native_transport: true +# port for the CQL native transport to listen for clients on +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +native_transport_port: 9042 +# Enabling native transport encryption in client_encryption_options allows you to either use +# encryption for the standard port or to use a dedicated, additional port along with the unencrypted +# standard native_transport_port. +# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption +# for native_transport_port. Setting native_transport_port_ssl to a different value +# from native_transport_port will use encryption for native_transport_port_ssl while +# keeping native_transport_port unencrypted. +native_transport_port_ssl: 9142 +# The maximum threads for handling requests when the native transport is used. +# This is similar to rpc_max_threads though the default differs slightly (and +# there is no native_transport_min_threads, idle threads will always be stopped +# after 30 seconds). +# native_transport_max_threads: 128 +# +# The maximum size of allowed frame. Frame (requests) larger than this will +# be rejected as invalid. The default is 256MB. If you're changing this parameter, +# you may want to adjust max_value_size_in_mb accordingly. This should be positive and less than 2048. +# native_transport_max_frame_size_in_mb: 256 + +# The maximum number of concurrent client connections. +# The default is -1, which means unlimited. +# native_transport_max_concurrent_connections: -1 + +# The maximum number of concurrent client connections per source ip. +# The default is -1, which means unlimited. +# native_transport_max_concurrent_connections_per_ip: -1 + +# Whether to start the thrift rpc server. +start_rpc: true + +# The address or interface to bind the Thrift RPC service and native transport +# server to. +# +# Set rpc_address OR rpc_interface, not both. +# +# Leaving rpc_address blank has the same effect as on listen_address +# (i.e. it will be based on the configured hostname of the node). +# +# Note that unlike listen_address, you can specify 0.0.0.0, but you must also +# set broadcast_rpc_address to a value other than 0.0.0.0. +# +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +rpc_address: 0.0.0.0 + +# Set rpc_address OR rpc_interface, not both. Interfaces must correspond +# to a single address, IP aliasing is not supported. +# rpc_interface: eth1 + +# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address +# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4 +# address will be used. If true the first ipv6 address will be used. Defaults to false preferring +# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. +# rpc_interface_prefer_ipv6: false + +# port for Thrift to listen for clients on +rpc_port: 9160 + +# RPC address to broadcast to drivers and other Cassandra nodes. This cannot +# be set to 0.0.0.0. If left blank, this will be set to the value of +# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must +# be set. +broadcast_rpc_address: 1.2.3.4 + +# enable or disable keepalive on rpc/native connections +rpc_keepalive: true + +# Cassandra provides two out-of-the-box options for the RPC Server: +# +# sync +# One thread per thrift connection. For a very large number of clients, memory +# will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size +# per thread, and that will correspond to your use of virtual memory (but physical memory +# may be limited depending on use of stack space). +# +# hsha +# Stands for "half synchronous, half asynchronous." All thrift clients are handled +# asynchronously using a small number of threads that does not vary with the amount +# of thrift clients (and thus scales well to many clients). The rpc requests are still +# synchronous (one thread per active request). If hsha is selected then it is essential +# that rpc_max_threads is changed from the default value of unlimited. +# +# The default is sync because on Windows hsha is about 30% slower. On Linux, +# sync/hsha performance is about the same, with hsha of course using less memory. +# +# Alternatively, can provide your own RPC server by providing the fully-qualified class name +# of an o.a.c.t.TServerFactory that can create an instance of it. +rpc_server_type: sync + +# Uncomment rpc_min|max_thread to set request pool size limits. +# +# Regardless of your choice of RPC server (see above), the number of maximum requests in the +# RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync +# RPC server, it also dictates the number of clients that can be connected at all). +# +# The default is unlimited and thus provides no protection against clients overwhelming the server. You are +# encouraged to set a maximum that makes sense for you in production, but do keep in mind that +# rpc_max_threads represents the maximum number of client requests this server may execute concurrently. +# +# rpc_min_threads: 16 +# rpc_max_threads: 2048 + +# uncomment to set socket buffer sizes on rpc connections +# rpc_send_buff_size_in_bytes: +# rpc_recv_buff_size_in_bytes: + +# Uncomment to set socket buffer size for internode communication +# Note that when setting this, the buffer size is limited by net.core.wmem_max +# and when not setting it it is defined by net.ipv4.tcp_wmem +# See also: +# /proc/sys/net/core/wmem_max +# /proc/sys/net/core/rmem_max +# /proc/sys/net/ipv4/tcp_wmem +# /proc/sys/net/ipv4/tcp_wmem +# and 'man tcp' +# internode_send_buff_size_in_bytes: + +# Uncomment to set socket buffer size for internode communication +# Note that when setting this, the buffer size is limited by net.core.wmem_max +# and when not setting it it is defined by net.ipv4.tcp_wmem +# internode_recv_buff_size_in_bytes: + +# Frame size for thrift (maximum message length). +thrift_framed_transport_size_in_mb: 15 + +# Set to true to have Cassandra create a hard link to each sstable +# flushed or streamed locally in a backups/ subdirectory of the +# keyspace data. Removing these links is the operator's +# responsibility. +incremental_backups: false + +# Whether or not to take a snapshot before each compaction. Be +# careful using this option, since Cassandra won't clean up the +# snapshots for you. Mostly useful if you're paranoid when there +# is a data format change. +snapshot_before_compaction: false + +# Whether or not a snapshot is taken of the data before keyspace truncation +# or dropping of column families. The STRONGLY advised default of true +# should be used to provide data safety. If you set this flag to false, you will +# lose data on truncation or drop. +auto_snapshot: true + +# Granularity of the collation index of rows within a partition. +# Increase if your rows are large, or if you have a very large +# number of rows per partition. The competing goals are these: +# +# - a smaller granularity means more index entries are generated +# and looking up rows withing the partition by collation column +# is faster +# - but, Cassandra will keep the collation index in memory for hot +# rows (as part of the key cache), so a larger granularity means +# you can cache more hot rows +column_index_size_in_kb: 64 + +# Per sstable indexed key cache entries (the collation index in memory +# mentioned above) exceeding this size will not be held on heap. +# This means that only partition information is held on heap and the +# index entries are read from disk. +# +# Note that this size refers to the size of the +# serialized index information and not the size of the partition. +column_index_cache_size_in_kb: 2 + +# Number of simultaneous compactions to allow, NOT including +# validation "compactions" for anti-entropy repair. Simultaneous +# compactions can help preserve read performance in a mixed read/write +# workload, by mitigating the tendency of small sstables to accumulate +# during a single long running compactions. The default is usually +# fine and if you experience problems with compaction running too +# slowly or too fast, you should look at +# compaction_throughput_mb_per_sec first. +# +# concurrent_compactors defaults to the smaller of (number of disks, +# number of cores), with a minimum of 2 and a maximum of 8. +# +# If your data directories are backed by SSD, you should increase this +# to the number of cores. +#concurrent_compactors: 1 + +# Throttles compaction to the given total throughput across the entire +# system. The faster you insert data, the faster you need to compact in +# order to keep the sstable count down, but in general, setting this to +# 16 to 32 times the rate you are inserting data is more than sufficient. +# Setting this to 0 disables throttling. Note that this account for all types +# of compaction, including validation compaction. +compaction_throughput_mb_per_sec: 16 + +# When compacting, the replacement sstable(s) can be opened before they +# are completely written, and used in place of the prior sstables for +# any range that has been written. This helps to smoothly transfer reads +# between the sstables, reducing page cache churn and keeping hot rows hot +sstable_preemptive_open_interval_in_mb: 50 + +# Throttles all outbound streaming file transfers on this node to the +# given total throughput in Mbps. This is necessary because Cassandra does +# mostly sequential IO when streaming data during bootstrap or repair, which +# can lead to saturating the network connection and degrading rpc performance. +# When unset, the default is 200 Mbps or 25 MB/s. +# stream_throughput_outbound_megabits_per_sec: 200 + +# Throttles all streaming file transfer between the datacenters, +# this setting allows users to throttle inter dc stream throughput in addition +# to throttling all network stream traffic as configured with +# stream_throughput_outbound_megabits_per_sec +# When unset, the default is 200 Mbps or 25 MB/s +# inter_dc_stream_throughput_outbound_megabits_per_sec: 200 + +# How long the coordinator should wait for read operations to complete +read_request_timeout_in_ms: 5000 +# How long the coordinator should wait for seq or index scans to complete +range_request_timeout_in_ms: 10000 +# How long the coordinator should wait for writes to complete +write_request_timeout_in_ms: 2000 +# How long the coordinator should wait for counter writes to complete +counter_write_request_timeout_in_ms: 5000 +# How long a coordinator should continue to retry a CAS operation +# that contends with other proposals for the same row +cas_contention_timeout_in_ms: 1000 +# How long the coordinator should wait for truncates to complete +# (This can be much longer, because unless auto_snapshot is disabled +# we need to flush first so we can snapshot before removing the data.) +truncate_request_timeout_in_ms: 60000 +# The default timeout for other, miscellaneous operations +request_timeout_in_ms: 10000 + +# How long before a node logs slow queries. Select queries that take longer than +# this timeout to execute, will generate an aggregated log message, so that slow queries +# can be identified. Set this value to zero to disable slow query logging. +slow_query_log_timeout_in_ms: 500 + +# Enable operation timeout information exchange between nodes to accurately +# measure request timeouts. If disabled, replicas will assume that requests +# were forwarded to them instantly by the coordinator, which means that +# under overload conditions we will waste that much extra time processing +# already-timed-out requests. +# +# Warning: before enabling this property make sure to ntp is installed +# and the times are synchronized between the nodes. +cross_node_timeout: false + +# Set keep-alive period for streaming +# This node will send a keep-alive message periodically with this period. +# If the node does not receive a keep-alive message from the peer for +# 2 keep-alive cycles the stream session times out and fail +# Default value is 300s (5 minutes), which means stalled stream +# times out in 10 minutes by default +# streaming_keep_alive_period_in_secs: 300 + +# phi value that must be reached for a host to be marked down. +# most users should never need to adjust this. +# phi_convict_threshold: 8 + +# endpoint_snitch -- Set this to a class that implements +# IEndpointSnitch. The snitch has two functions: +# +# - it teaches Cassandra enough about your network topology to route +# requests efficiently +# - it allows Cassandra to spread replicas around your cluster to avoid +# correlated failures. It does this by grouping machines into +# "datacenters" and "racks." Cassandra will do its best not to have +# more than one replica on the same "rack" (which may not actually +# be a physical location) +# +# CASSANDRA WILL NOT ALLOW YOU TO SWITCH TO AN INCOMPATIBLE SNITCH +# ONCE DATA IS INSERTED INTO THE CLUSTER. This would cause data loss. +# This means that if you start with the default SimpleSnitch, which +# locates every node on "rack1" in "datacenter1", your only options +# if you need to add another datacenter are GossipingPropertyFileSnitch +# (and the older PFS). From there, if you want to migrate to an +# incompatible snitch like Ec2Snitch you can do it by adding new nodes +# under Ec2Snitch (which will locate them in a new "datacenter") and +# decommissioning the old ones. +# +# Out of the box, Cassandra provides: +# +# SimpleSnitch: +# Treats Strategy order as proximity. This can improve cache +# locality when disabling read repair. Only appropriate for +# single-datacenter deployments. +# +# GossipingPropertyFileSnitch +# This should be your go-to snitch for production use. The rack +# and datacenter for the local node are defined in +# cassandra-rackdc.properties and propagated to other nodes via +# gossip. If cassandra-topology.properties exists, it is used as a +# fallback, allowing migration from the PropertyFileSnitch. +# +# PropertyFileSnitch: +# Proximity is determined by rack and data center, which are +# explicitly configured in cassandra-topology.properties. +# +# Ec2Snitch: +# Appropriate for EC2 deployments in a single Region. Loads Region +# and Availability Zone information from the EC2 API. The Region is +# treated as the datacenter, and the Availability Zone as the rack. +# Only private IPs are used, so this will not work across multiple +# Regions. +# +# Ec2MultiRegionSnitch: +# Uses public IPs as broadcast_address to allow cross-region +# connectivity. (Thus, you should set seed addresses to the public +# IP as well.) You will need to open the storage_port or +# ssl_storage_port on the public IP firewall. (For intra-Region +# traffic, Cassandra will switch to the private IP after +# establishing a connection.) +# +# RackInferringSnitch: +# Proximity is determined by rack and data center, which are +# assumed to correspond to the 3rd and 2nd octet of each node's IP +# address, respectively. Unless this happens to match your +# deployment conventions, this is best used as an example of +# writing a custom Snitch class and is provided in that spirit. +# +# You can use a custom Snitch by setting this to the full class name +# of the snitch, which will be assumed to be on your classpath. +endpoint_snitch: SimpleSnitch + +# controls how often to perform the more expensive part of host score +# calculation +dynamic_snitch_update_interval_in_ms: 100 +# controls how often to reset all host scores, allowing a bad host to +# possibly recover +dynamic_snitch_reset_interval_in_ms: 600000 +# if set greater than zero and read_repair_chance is < 1.0, this will allow +# 'pinning' of replicas to hosts in order to increase cache capacity. +# The badness threshold will control how much worse the pinned host has to be +# before the dynamic snitch will prefer other replicas over it. This is +# expressed as a double which represents a percentage. Thus, a value of +# 0.2 means Cassandra would continue to prefer the static snitch values +# until the pinned host was 20% worse than the fastest. +dynamic_snitch_badness_threshold: 0.1 + +# request_scheduler -- Set this to a class that implements +# RequestScheduler, which will schedule incoming client requests +# according to the specific policy. This is useful for multi-tenancy +# with a single Cassandra cluster. +# NOTE: This is specifically for requests from the client and does +# not affect inter node communication. +# org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place +# org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of +# client requests to a node with a separate queue for each +# request_scheduler_id. The scheduler is further customized by +# request_scheduler_options as described below. +request_scheduler: org.apache.cassandra.scheduler.NoScheduler + +# Scheduler Options vary based on the type of scheduler +# +# NoScheduler +# Has no options +# +# RoundRobin +# throttle_limit +# The throttle_limit is the number of in-flight +# requests per client. Requests beyond +# that limit are queued up until +# running requests can complete. +# The value of 80 here is twice the number of +# concurrent_reads + concurrent_writes. +# default_weight +# default_weight is optional and allows for +# overriding the default which is 1. +# weights +# Weights are optional and will default to 1 or the +# overridden default_weight. The weight translates into how +# many requests are handled during each turn of the +# RoundRobin, based on the scheduler id. +# +# request_scheduler_options: +# throttle_limit: 80 +# default_weight: 5 +# weights: +# Keyspace1: 1 +# Keyspace2: 5 + +# request_scheduler_id -- An identifier based on which to perform +# the request scheduling. Currently the only valid option is keyspace. +# request_scheduler_id: keyspace + +# Enable or disable inter-node encryption +# JVM defaults for supported SSL socket protocols and cipher suites can +# be replaced using custom encryption options. This is not recommended +# unless you have policies in place that dictate certain settings, or +# need to disable vulnerable ciphers or protocols in case the JVM cannot +# be updated. +# FIPS compliant settings can be configured at JVM level and should not +# involve changing encryption settings here: +# https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/FIPS.html +# *NOTE* No custom encryption options are enabled at the moment +# The available internode options are : all, none, dc, rack +# +# If set to dc cassandra will encrypt the traffic between the DCs +# If set to rack cassandra will encrypt the traffic between the racks +# +# The passwords used in these options must match the passwords used when generating +# the keystore and truststore. For instructions on generating these files, see: +# http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore +# +server_encryption_options: + internode_encryption: none + keystore: conf/.keystore + keystore_password: cassandra + truststore: conf/.truststore + truststore_password: cassandra + # More advanced defaults below: + # protocol: TLS + # algorithm: SunX509 + # store_type: JKS + # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] + # require_client_auth: false + # require_endpoint_verification: false + +# enable or disable client/server encryption. +client_encryption_options: + enabled: true + # If enabled and optional is set to true encrypted and unencrypted connections are handled. + optional: false + keystore: /certs/server.jks + keystore_password: my_password + require_client_auth: true + # Set trustore and truststore_password if require_client_auth is true + truststore: /certs/truststore.jks + truststore_password: my_password + # More advanced defaults below: + protocol: TLS + store_type: JKS + cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] + +# internode_compression controls whether traffic between nodes is +# compressed. +# Can be: +# +# all +# all traffic is compressed +# +# dc +# traffic between different datacenters is compressed +# +# none +# nothing is compressed. +internode_compression: dc + +# Enable or disable tcp_nodelay for inter-dc communication. +# Disabling it will result in larger (but fewer) network packets being sent, +# reducing overhead from the TCP protocol itself, at the cost of increasing +# latency if you block for cross-datacenter responses. +inter_dc_tcp_nodelay: false + +# TTL for different trace types used during logging of the repair process. +tracetype_query_ttl: 86400 +tracetype_repair_ttl: 604800 + +# By default, Cassandra logs GC Pauses greater than 200 ms at INFO level +# This threshold can be adjusted to minimize logging if necessary +# gc_log_threshold_in_ms: 200 + +# If unset, all GC Pauses greater than gc_log_threshold_in_ms will log at +# INFO level +# UDFs (user defined functions) are disabled by default. +# As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code. +enable_user_defined_functions: false + +# Enables scripted UDFs (JavaScript UDFs). +# Java UDFs are always enabled, if enable_user_defined_functions is true. +# Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider. +# This option has no effect, if enable_user_defined_functions is false. +enable_scripted_user_defined_functions: false + +# Enables materialized view creation on this node. +# Materialized views are considered experimental and are not recommended for production use. +enable_materialized_views: true + +# The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation. +# Lowering this value on Windows can provide much tighter latency and better throughput, however +# some virtualized environments may see a negative performance impact from changing this setting +# below their system default. The sysinternals 'clockres' tool can confirm your system's default +# setting. +windows_timer_interval: 1 + + +# Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from +# a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by +# the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys +# can still (and should!) be in the keystore and will be used on decrypt operations +# (to handle the case of key rotation). +# +# It is strongly recommended to download and install Java Cryptography Extension (JCE) +# Unlimited Strength Jurisdiction Policy Files for your version of the JDK. +# (current link: http://www.oracle.com/technetwork/java/javase/downloads/jce8-download-2133166.html) +# +# Currently, only the following file types are supported for transparent data encryption, although +# more are coming in future cassandra releases: commitlog, hints +transparent_data_encryption_options: + enabled: false + chunk_length_kb: 64 + cipher: AES/CBC/PKCS5Padding + key_alias: testing:1 + # CBC IV length for AES needs to be 16 bytes (which is also the default size) + # iv_length: 16 + key_provider: + - class_name: org.apache.cassandra.security.JKSKeyProvider + parameters: + - keystore: conf/.keystore + keystore_password: cassandra + store_type: JCEKS + key_password: cassandra + + +##################### +# SAFETY THRESHOLDS # +##################### + +# When executing a scan, within or across a partition, we need to keep the +# tombstones seen in memory so we can return them to the coordinator, which +# will use them to make sure other replicas also know about the deleted rows. +# With workloads that generate a lot of tombstones, this can cause performance +# problems and even exaust the server heap. +# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets) +# Adjust the thresholds here if you understand the dangers and want to +# scan more tombstones anyway. These thresholds may also be adjusted at runtime +# using the StorageService mbean. +tombstone_warn_threshold: 1000 +tombstone_failure_threshold: 100000 + +# Log WARN on any multiple-partition batch size exceeding this value. 5kb per batch by default. +# Caution should be taken on increasing the size of this threshold as it can lead to node instability. +batch_size_warn_threshold_in_kb: 5 + +# Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default. +batch_size_fail_threshold_in_kb: 50 + +# Log WARN on any batches not of type LOGGED than span across more partitions than this limit +unlogged_batch_across_partitions_warn_threshold: 10 + +# Log a warning when compacting partitions larger than this value +compaction_large_partition_warning_threshold_mb: 100 + +# GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level +# Adjust the threshold based on your application throughput requirement +# By default, Cassandra logs GC Pauses greater than 200 ms at INFO level +gc_warn_threshold_in_ms: 1000 + +# Maximum size of any value in SSTables. Safety measure to detect SSTable corruption +# early. Any value size larger than this threshold will result into marking an SSTable +# as corrupted. This should be positive and less than 2048. +# max_value_size_in_mb: 256 + +# Back-pressure settings # +# If enabled, the coordinator will apply the back-pressure strategy specified below to each mutation +# sent to replicas, with the aim of reducing pressure on overloaded replicas. +back_pressure_enabled: false +# The back-pressure strategy applied. +# The default implementation, RateBasedBackPressure, takes three arguments: +# high ratio, factor, and flow type, and uses the ratio between incoming mutation responses and outgoing mutation requests. +# If below high ratio, outgoing mutations are rate limited according to the incoming rate decreased by the given factor; +# if above high ratio, the rate limiting is increased by the given factor; +# such factor is usually best configured between 1 and 10, use larger values for a faster recovery +# at the expense of potentially more dropped mutations; +# the rate limiting is applied according to the flow type: if FAST, it's rate limited at the speed of the fastest replica, +# if SLOW at the speed of the slowest one. +# New strategies can be added. Implementors need to implement org.apache.cassandra.net.BackpressureStrategy and +# provide a public constructor accepting a Map. +back_pressure_strategy: + - class_name: org.apache.cassandra.net.RateBasedBackPressure + parameters: + - high_ratio: 0.90 + factor: 5 + flow: FAST + +# Coalescing Strategies # +# Coalescing multiples messages turns out to significantly boost message processing throughput (think doubling or more). +# On bare metal, the floor for packet processing throughput is high enough that many applications won't notice, but in +# virtualized environments, the point at which an application can be bound by network packet processing can be +# surprisingly low compared to the throughput of task processing that is possible inside a VM. It's not that bare metal +# doesn't benefit from coalescing messages, it's that the number of packets a bare metal network interface can process +# is sufficient for many applications such that no load starvation is experienced even without coalescing. +# There are other benefits to coalescing network messages that are harder to isolate with a simple metric like messages +# per second. By coalescing multiple tasks together, a network thread can process multiple messages for the cost of one +# trip to read from a socket, and all the task submission work can be done at the same time reducing context switching +# and increasing cache friendliness of network message processing. +# See CASSANDRA-8692 for details. + +# Strategy to use for coalescing messages in OutboundTcpConnection. +# Can be fixed, movingaverage, timehorizon, disabled (default). +# You can also specify a subclass of CoalescingStrategies.CoalescingStrategy by name. +# otc_coalescing_strategy: DISABLED + +# How many microseconds to wait for coalescing. For fixed strategy this is the amount of time after the first +# message is received before it will be sent with any accompanying messages. For moving average this is the +# maximum amount of time that will be waited as well as the interval at which messages must arrive on average +# for coalescing to be enabled. +# otc_coalescing_window_us: 200 + +# Do not try to coalesce messages if we already got that many messages. This should be more than 2 and less than 128. +# otc_coalescing_enough_coalesced_messages: 8 + +# How many milliseconds to wait between two expiration runs on the backlog (queue) of the OutboundTcpConnection. +# Expiration is done if messages are piling up in the backlog. Droppable messages are expired to free the memory +# taken by expired messages. The interval should be between 0 and 1000, and in most installations the default value +# will be appropriate. A smaller value could potentially expire messages slightly sooner at the expense of more CPU +# time and queue contention while iterating the backlog of messages. +# An interval of 0 disables any wait time, which is the behavior of former Cassandra versions. +# +# otc_backlog_expiration_interval_ms: 200 diff --git a/.ci/docker-compose-file/docker-compose-cassandra.yaml b/.ci/docker-compose-file/docker-compose-cassandra.yaml index f7143f471..918a61037 100644 --- a/.ci/docker-compose-file/docker-compose-cassandra.yaml +++ b/.ci/docker-compose-file/docker-compose-cassandra.yaml @@ -1,32 +1,38 @@ version: '3.9' +x-cassandra: &cassandra + restart: always + image: cassandra:${CASSANDRA_TAG:-3.11.6} + environment: + CASSANDRA_BROADCAST_ADDRESS: "1.2.3.4" + CASSANDRA_RPC_ADDRESS: "0.0.0.0" + HEAP_NEWSIZE: "128M" + MAX_HEAP_SIZE: "2048M" + #ports: + # - "9042:9042" + # - "9142:9142" + command: + - /bin/bash + - -c + - | + /opt/cassandra/bin/cassandra -f -R > /cassandra.log & + /opt/cassandra/bin/cqlsh -u cassandra -p cassandra -e "CREATE KEYSPACE mqtt WITH REPLICATION = { 'class':'SimpleStrategy','replication_factor':1};" + while [[ $$? -ne 0 ]];do sleep 5; /opt/cassandra/bin/cqlsh -u cassandra -p cassandra -e "CREATE KEYSPACE mqtt WITH REPLICATION = { 'class':'SimpleStrategy','replication_factor':1};"; done + /opt/cassandra/bin/cqlsh -u cassandra -p cassandra -e "describe keyspaces;" + tail -f /cassandra.log + networks: + - emqx_bridge + services: cassandra_server: + <<: *cassandra container_name: cassandra - build: - context: ./cassandra - args: - CASSANDRA_TAG: ${CASSANDRA_TAG} - image: emqx-cassandra - restart: always - environment: - CASSANDRA_BROADCAST_ADDRESS: "1.2.3.4" - CASSANDRA_RPC_ADDRESS: "0.0.0.0" - HEAP_NEWSIZE: "128M" - MAX_HEAP_SIZE: "2048M" volumes: - ./certs:/certs - #ports: - # - "9042:9042" - # - "9142:9142" - command: - - /bin/bash - - -c - - | - /opt/cassandra/bin/cassandra -f -R > /cassandra.log & - /opt/cassandra/bin/cqlsh -u cassandra -p cassandra -e "CREATE KEYSPACE mqtt WITH REPLICATION = { 'class':'SimpleStrategy','replication_factor':1};" - while [[ $$? -ne 0 ]];do sleep 5; /opt/cassandra/bin/cqlsh -u cassandra -p cassandra -e "CREATE KEYSPACE mqtt WITH REPLICATION = { 'class':'SimpleStrategy','replication_factor':1};"; done - /opt/cassandra/bin/cqlsh -u cassandra -p cassandra -e "describe keyspaces;" - tail -f /cassandra.log - networks: - - emqx_bridge + - ./cassandra/cassandra.yaml:/etc/cassandra/cassandra.yaml + cassandra_noauth_server: + <<: *cassandra + container_name: cassandra_noauth + volumes: + - ./certs:/certs + - ./cassandra/cassandra_noauth.yaml:/etc/cassandra/cassandra.yaml diff --git a/.ci/docker-compose-file/docker-compose-kafka.yaml b/.ci/docker-compose-file/docker-compose-kafka.yaml index 18ef3991c..f5bdb24ec 100644 --- a/.ci/docker-compose-file/docker-compose-kafka.yaml +++ b/.ci/docker-compose-file/docker-compose-kafka.yaml @@ -18,7 +18,7 @@ services: - /tmp/emqx-ci/emqx-shared-secret:/var/lib/secret kdc: hostname: kdc.emqx.net - image: ghcr.io/emqx/emqx-builder/5.1-3:1.14.5-25.3.2-1-ubuntu20.04 + image: ghcr.io/emqx/emqx-builder/5.1-4:1.14.5-25.3.2-2-ubuntu20.04 container_name: kdc.emqx.net expose: - 88 # kdc diff --git a/.ci/docker-compose-file/docker-compose.yaml b/.ci/docker-compose-file/docker-compose.yaml index 504358419..9adbef02e 100644 --- a/.ci/docker-compose-file/docker-compose.yaml +++ b/.ci/docker-compose-file/docker-compose.yaml @@ -3,7 +3,7 @@ version: '3.9' services: erlang: container_name: erlang - image: ${DOCKER_CT_RUNNER_IMAGE:-ghcr.io/emqx/emqx-builder/5.1-3:1.14.5-25.3.2-1-ubuntu20.04} + image: ${DOCKER_CT_RUNNER_IMAGE:-ghcr.io/emqx/emqx-builder/5.1-4:1.14.5-25.3.2-2-ubuntu20.04} env_file: - conf.env environment: diff --git a/.github/actions/package-macos/action.yaml b/.github/actions/package-macos/action.yaml index 6b47ceafa..25edcb5f5 100644 --- a/.github/actions/package-macos/action.yaml +++ b/.github/actions/package-macos/action.yaml @@ -3,7 +3,7 @@ inputs: profile: # emqx, emqx-enterprise required: true type: string - otp: # 25.3.2-1 + otp: # 25.3.2-2 required: true type: string os: diff --git a/.github/workflows/_pr_entrypoint.yaml b/.github/workflows/_pr_entrypoint.yaml index 87c4d6145..7de9a64fd 100644 --- a/.github/workflows/_pr_entrypoint.yaml +++ b/.github/workflows/_pr_entrypoint.yaml @@ -17,7 +17,7 @@ env: jobs: sanity-checks: runs-on: ${{ github.repository_owner == 'emqx' && 'aws-amd64' || 'ubuntu-22.04' }} - container: "ghcr.io/emqx/emqx-builder/5.1-3:1.14.5-25.3.2-1-ubuntu22.04" + container: "ghcr.io/emqx/emqx-builder/5.1-4:1.14.5-25.3.2-2-ubuntu22.04" outputs: ct-matrix: ${{ steps.matrix.outputs.ct-matrix }} ct-host: ${{ steps.matrix.outputs.ct-host }} @@ -25,9 +25,9 @@ jobs: version-emqx: ${{ steps.matrix.outputs.version-emqx }} version-emqx-enterprise: ${{ steps.matrix.outputs.version-emqx-enterprise }} runner: ${{ github.repository_owner == 'emqx' && 'aws-amd64' || 'ubuntu-22.04' }} - builder: "ghcr.io/emqx/emqx-builder/5.1-3:1.14.5-25.3.2-1-ubuntu22.04" - builder_vsn: "5.1-3" - otp_vsn: "25.3.2-1" + builder: "ghcr.io/emqx/emqx-builder/5.1-4:1.14.5-25.3.2-2-ubuntu22.04" + builder_vsn: "5.1-4" + otp_vsn: "25.3.2-2" elixir_vsn: "1.14.5" steps: @@ -93,13 +93,13 @@ jobs: MATRIX="$(echo "${APPS}" | jq -c ' [ (.[] | select(.profile == "emqx") | . + { - builder: "5.1-3", - otp: "25.3.2-1", + builder: "5.1-4", + otp: "25.3.2-2", elixir: "1.14.5" }), (.[] | select(.profile == "emqx-enterprise") | . + { - builder: "5.1-3", - otp: ["25.3.2-1"][], + builder: "5.1-4", + otp: ["25.3.2-2"][], elixir: "1.14.5" }) ] diff --git a/.github/workflows/_push-entrypoint.yaml b/.github/workflows/_push-entrypoint.yaml index bc3bc486e..afdf2a050 100644 --- a/.github/workflows/_push-entrypoint.yaml +++ b/.github/workflows/_push-entrypoint.yaml @@ -21,7 +21,7 @@ env: jobs: prepare: runs-on: ${{ github.repository_owner == 'emqx' && 'aws-amd64' || 'ubuntu-22.04' }} - container: 'ghcr.io/emqx/emqx-builder/5.1-3:1.14.5-25.3.2-1-ubuntu22.04' + container: 'ghcr.io/emqx/emqx-builder/5.1-4:1.14.5-25.3.2-2-ubuntu22.04' outputs: profile: ${{ steps.parse-git-ref.outputs.profile }} release: ${{ steps.parse-git-ref.outputs.release }} @@ -31,9 +31,9 @@ jobs: ct-host: ${{ steps.matrix.outputs.ct-host }} ct-docker: ${{ steps.matrix.outputs.ct-docker }} runner: ${{ github.repository_owner == 'emqx' && 'aws-amd64' || 'ubuntu-22.04' }} - builder: 'ghcr.io/emqx/emqx-builder/5.1-3:1.14.5-25.3.2-1-ubuntu22.04' - builder_vsn: '5.1-3' - otp_vsn: '25.3.2-1' + builder: 'ghcr.io/emqx/emqx-builder/5.1-4:1.14.5-25.3.2-2-ubuntu22.04' + builder_vsn: '5.1-4' + otp_vsn: '25.3.2-2' elixir_vsn: '1.14.5' steps: @@ -64,13 +64,13 @@ jobs: MATRIX="$(echo "${APPS}" | jq -c ' [ (.[] | select(.profile == "emqx") | . + { - builder: "5.1-3", - otp: "25.3.2-1", + builder: "5.1-4", + otp: "25.3.2-2", elixir: "1.14.5" }), (.[] | select(.profile == "emqx-enterprise") | . + { - builder: "5.1-3", - otp: ["25.3.2-1"][], + builder: "5.1-4", + otp: ["25.3.2-2"][], elixir: "1.14.5" }) ] diff --git a/.github/workflows/build_and_push_docker_images.yaml b/.github/workflows/build_and_push_docker_images.yaml index b2bfe735b..3f568e430 100644 --- a/.github/workflows/build_and_push_docker_images.yaml +++ b/.github/workflows/build_and_push_docker_images.yaml @@ -61,7 +61,7 @@ on: otp_vsn: required: false type: string - default: '25.3.2-1' + default: '25.3.2-2' elixir_vsn: required: false type: string @@ -69,7 +69,7 @@ on: builder_vsn: required: false type: string - default: '5.1-3' + default: '5.1-4' runner: required: false type: string diff --git a/.github/workflows/build_packages.yaml b/.github/workflows/build_packages.yaml index d33d46f11..d482d2c0e 100644 --- a/.github/workflows/build_packages.yaml +++ b/.github/workflows/build_packages.yaml @@ -57,7 +57,7 @@ on: otp_vsn: required: false type: string - default: '25.3.2-1' + default: '25.3.2-2' elixir_vsn: required: false type: string @@ -69,7 +69,7 @@ on: builder_vsn: required: false type: string - default: '5.1-3' + default: '5.1-4' jobs: windows: diff --git a/.github/workflows/build_packages_cron.yaml b/.github/workflows/build_packages_cron.yaml index 431c4f5c4..a67ab81d2 100644 --- a/.github/workflows/build_packages_cron.yaml +++ b/.github/workflows/build_packages_cron.yaml @@ -24,7 +24,7 @@ jobs: - ['emqx-enterprise', 'release-51'] - ['emqx-enterprise', 'release-52'] otp: - - 25.3.2-1 + - 25.3.2-2 arch: - amd64 os: @@ -32,7 +32,7 @@ jobs: - ubuntu22.04 - amzn2023 builder: - - 5.1-3 + - 5.1-4 elixir: - 1.14.5 @@ -99,7 +99,7 @@ jobs: branch: - master otp: - - 25.3.2-1 + - 25.3.2-2 os: - macos-13 - macos-12-arm64 diff --git a/.github/workflows/build_slim_packages.yaml b/.github/workflows/build_slim_packages.yaml index b7ba78ef4..8e13ec0a1 100644 --- a/.github/workflows/build_slim_packages.yaml +++ b/.github/workflows/build_slim_packages.yaml @@ -34,15 +34,15 @@ on: builder: required: false type: string - default: 'ghcr.io/emqx/emqx-builder/5.1-3:1.14.5-25.3.2-1-ubuntu22.04' + default: 'ghcr.io/emqx/emqx-builder/5.1-4:1.14.5-25.3.2-2-ubuntu22.04' builder_vsn: required: false type: string - default: '5.1-3' + default: '5.1-4' otp_vsn: required: false type: string - default: '25.3.2-1' + default: '25.3.2-2' elixir_vsn: required: false type: string @@ -58,8 +58,8 @@ jobs: fail-fast: false matrix: profile: - - ["emqx", "25.3.2-1", "ubuntu20.04", "elixir"] - - ["emqx-enterprise", "25.3.2-1", "ubuntu20.04", "erlang"] + - ["emqx", "25.3.2-2", "ubuntu20.04", "elixir"] + - ["emqx-enterprise", "25.3.2-2", "ubuntu20.04", "erlang"] container: "ghcr.io/emqx/emqx-builder/${{ inputs.builder_vsn }}:${{ inputs.elixir_vsn }}-${{ matrix.profile[1] }}-${{ matrix.profile[2] }}" diff --git a/.github/workflows/codeql.yaml b/.github/workflows/codeql.yaml index 6d4cc3dc4..a0b701d17 100644 --- a/.github/workflows/codeql.yaml +++ b/.github/workflows/codeql.yaml @@ -18,7 +18,7 @@ jobs: contents: read security-events: write container: - image: ghcr.io/emqx/emqx-builder/5.1-1:1.14.5-25.3.2-1-ubuntu22.04 + image: ghcr.io/emqx/emqx-builder/5.1-4:1.14.5-25.3.2-2-ubuntu22.04 strategy: fail-fast: false diff --git a/.github/workflows/performance_test.yaml b/.github/workflows/performance_test.yaml index 10b040271..224cfb0b3 100644 --- a/.github/workflows/performance_test.yaml +++ b/.github/workflows/performance_test.yaml @@ -23,7 +23,7 @@ jobs: prepare: runs-on: ubuntu-latest if: github.repository_owner == 'emqx' - container: ghcr.io/emqx/emqx-builder/5.1-3:1.14.5-25.3.2-1-ubuntu20.04 + container: ghcr.io/emqx/emqx-builder/5.1-4:1.14.5-25.3.2-2-ubuntu20.04 outputs: BENCH_ID: ${{ steps.prepare.outputs.BENCH_ID }} PACKAGE_FILE: ${{ steps.package_file.outputs.PACKAGE_FILE }} diff --git a/.github/workflows/run_test_cases.yaml b/.github/workflows/run_test_cases.yaml index 48e551612..82b2bbeb9 100644 --- a/.github/workflows/run_test_cases.yaml +++ b/.github/workflows/run_test_cases.yaml @@ -29,6 +29,7 @@ env: jobs: eunit_and_proper: runs-on: ${{ inputs.runner }} + name: "eunit_and_proper (${{ matrix.profile }})" strategy: fail-fast: false matrix: @@ -69,6 +70,7 @@ jobs: ct_docker: runs-on: ${{ inputs.runner }} + name: "ct_docker (${{ matrix.app }}-${{ matrix.suitegroup }})" strategy: fail-fast: false matrix: @@ -116,6 +118,7 @@ jobs: ct: runs-on: ${{ inputs.runner }} + name: "ct (${{ matrix.app }}-${{ matrix.suitegroup }})" strategy: fail-fast: false matrix: @@ -155,6 +158,17 @@ jobs: name: logs-${{ matrix.profile }}-${{ matrix.prefix }}-${{ matrix.otp }}-sg${{ matrix.suitegroup }} path: _build/test/logs + tests_passed: + needs: + - eunit_and_proper + - ct + - ct_docker + runs-on: ${{ inputs.runner }} + strategy: + fail-fast: false + steps: + - run: echo "All tests passed" + make_cover: needs: - eunit_and_proper diff --git a/.github/workflows/static_checks.yaml b/.github/workflows/static_checks.yaml index 3b32a36b4..21a753a37 100644 --- a/.github/workflows/static_checks.yaml +++ b/.github/workflows/static_checks.yaml @@ -23,6 +23,7 @@ env: jobs: static_checks: runs-on: ${{ inputs.runner }} + name: "static_checks (${{ matrix.profile }})" strategy: fail-fast: false matrix: diff --git a/.tool-versions b/.tool-versions index 3a2251dc8..a988325fa 100644 --- a/.tool-versions +++ b/.tool-versions @@ -1,2 +1,2 @@ -erlang 25.3.2-1 +erlang 25.3.2-2 elixir 1.14.5-otp-25 diff --git a/Makefile b/Makefile index 32eedc4e2..7377a6ffc 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ REBAR = $(CURDIR)/rebar3 BUILD = $(CURDIR)/build SCRIPTS = $(CURDIR)/scripts export EMQX_RELUP ?= true -export EMQX_DEFAULT_BUILDER = ghcr.io/emqx/emqx-builder/5.1-3:1.14.5-25.3.2-1-debian11 +export EMQX_DEFAULT_BUILDER = ghcr.io/emqx/emqx-builder/5.1-4:1.14.5-25.3.2-2-debian11 export EMQX_DEFAULT_RUNNER = debian:11-slim export EMQX_REL_FORM ?= tgz export QUICER_DOWNLOAD_FROM_RELEASE = 1 @@ -296,7 +296,7 @@ $(foreach tt,$(ALL_ELIXIR_TGZS),$(eval $(call gen-elixir-tgz-target,$(tt)))) .PHONY: fmt fmt: $(REBAR) - @$(SCRIPTS)/erlfmt -w '{apps,lib-ee}/*/{src,include,priv,test}/**/*.{erl,hrl,app.src,eterm}' + @$(SCRIPTS)/erlfmt -w '{apps,lib-ee}/*/{src,include,priv,test,integration_test}/**/*.{erl,hrl,app.src,eterm}' @$(SCRIPTS)/erlfmt -w 'rebar.config.erl' @mix format diff --git a/apps/emqx/include/emqx.hrl b/apps/emqx/include/emqx.hrl index 2cba12365..bc1d66ca2 100644 --- a/apps/emqx/include/emqx.hrl +++ b/apps/emqx/include/emqx.hrl @@ -23,7 +23,6 @@ -define(SHARED_SUB_SHARD, emqx_shared_sub_shard). -define(CM_SHARD, emqx_cm_shard). -define(ROUTE_SHARD, route_shard). --define(PERSISTENT_SESSION_SHARD, emqx_persistent_session_shard). %% Banner %%-------------------------------------------------------------------- @@ -92,7 +91,7 @@ -record(route, { topic :: binary(), - dest :: node() | {binary(), node()} | emqx_session:sessionID() + dest :: node() | {binary(), node()} | emqx_session:session_id() }). %%-------------------------------------------------------------------- diff --git a/apps/emqx/include/emqx_channel.hrl b/apps/emqx/include/emqx_channel.hrl index be2448a20..53abcafd6 100644 --- a/apps/emqx/include/emqx_channel.hrl +++ b/apps/emqx/include/emqx_channel.hrl @@ -41,4 +41,9 @@ will_msg ]). +-define(REPLY_OUTGOING(Packets), {outgoing, Packets}). +-define(REPLY_CONNACK(Packet), {connack, Packet}). +-define(REPLY_EVENT(StateOrEvent), {event, StateOrEvent}). +-define(REPLY_CLOSE(Reason), {close, Reason}). + -define(EXPIRE_INTERVAL_INFINITE, 4294967295000). diff --git a/apps/emqx/include/emqx_release.hrl b/apps/emqx/include/emqx_release.hrl index c8326fac6..ff1d7c715 100644 --- a/apps/emqx/include/emqx_release.hrl +++ b/apps/emqx/include/emqx_release.hrl @@ -32,7 +32,7 @@ %% `apps/emqx/src/bpapi/README.md' %% Opensource edition --define(EMQX_RELEASE_CE, "5.1.5-build.3"). +-define(EMQX_RELEASE_CE, "5.1.6"). %% Enterprise edition -define(EMQX_RELEASE_EE, "5.2.0"). diff --git a/apps/emqx/include/emqx_session.hrl b/apps/emqx/include/emqx_session.hrl new file mode 100644 index 000000000..304f92d58 --- /dev/null +++ b/apps/emqx/include/emqx_session.hrl @@ -0,0 +1,59 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2017-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-ifndef(EMQX_SESSION_HRL). +-define(EMQX_SESSION_HRL, true). + +-record(session, { + %% Client's id + clientid :: emqx_types:clientid(), + id :: emqx_session:session_id(), + %% Is this session a persistent session i.e. was it started with Session-Expiry > 0 + is_persistent :: boolean(), + %% Client’s Subscriptions. + subscriptions :: map(), + %% Max subscriptions allowed + max_subscriptions :: non_neg_integer() | infinity, + %% Upgrade QoS? + upgrade_qos :: boolean(), + %% Client <- Broker: QoS1/2 messages sent to the client but + %% have not been unacked. + inflight :: emqx_inflight:inflight(), + %% All QoS1/2 messages published to when client is disconnected, + %% or QoS1/2 messages pending transmission to the Client. + %% + %% Optionally, QoS0 messages pending transmission to the Client. + mqueue :: emqx_mqueue:mqueue(), + %% Next packet id of the session + next_pkt_id = 1 :: emqx_types:packet_id(), + %% Retry interval for redelivering QoS1/2 messages (Unit: millisecond) + retry_interval :: timeout(), + %% Client -> Broker: QoS2 messages received from the client, but + %% have not been completely acknowledged + awaiting_rel :: map(), + %% Maximum number of awaiting QoS2 messages allowed + max_awaiting_rel :: non_neg_integer() | infinity, + %% Awaiting PUBREL Timeout (Unit: millisecond) + await_rel_timeout :: timeout(), + %% Created at + created_at :: pos_integer(), + %% Topic filter to iterator ID mapping. + %% Note: we shouldn't serialize this when persisting sessions, as this information + %% also exists in the `?ITERATOR_REF_TAB' table. + iterators = #{} :: #{emqx_topic:topic() => emqx_ds:iterator_id()} +}). + +-endif. diff --git a/apps/emqx/integration_test/emqx_ds_SUITE.erl b/apps/emqx/integration_test/emqx_ds_SUITE.erl new file mode 100644 index 000000000..cbfa5c185 --- /dev/null +++ b/apps/emqx/integration_test/emqx_ds_SUITE.erl @@ -0,0 +1,340 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%%-------------------------------------------------------------------- +-module(emqx_ds_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("stdlib/include/assert.hrl"). +-include_lib("common_test/include/ct.hrl"). +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). +-include_lib("emqx/include/emqx_mqtt.hrl"). + +-define(DS_SHARD, <<"local">>). +-define(ITERATOR_REF_TAB, emqx_ds_iterator_ref). + +%%------------------------------------------------------------------------------ +%% CT boilerplate +%%------------------------------------------------------------------------------ + +all() -> + emqx_common_test_helpers:all(?MODULE). + +init_per_suite(Config) -> + TCApps = emqx_cth_suite:start( + app_specs(), + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + [{tc_apps, TCApps} | Config]. + +end_per_suite(Config) -> + TCApps = ?config(tc_apps, Config), + emqx_cth_suite:stop(TCApps), + ok. + +init_per_testcase(TestCase, Config) when + TestCase =:= t_session_subscription_idempotency; + TestCase =:= t_session_unsubscription_idempotency +-> + Cluster = cluster(#{n => 1}), + ClusterOpts = #{work_dir => emqx_cth_suite:work_dir(TestCase, Config)}, + NodeSpecs = emqx_cth_cluster:mk_nodespecs(Cluster, ClusterOpts), + Nodes = emqx_cth_cluster:start(Cluster, ClusterOpts), + [ + {cluster, Cluster}, + {node_specs, NodeSpecs}, + {cluster_opts, ClusterOpts}, + {nodes, Nodes} + | Config + ]; +init_per_testcase(_TestCase, Config) -> + Config. + +end_per_testcase(TestCase, Config) when + TestCase =:= t_session_subscription_idempotency; + TestCase =:= t_session_unsubscription_idempotency +-> + Nodes = ?config(nodes, Config), + ok = emqx_cth_cluster:stop(Nodes), + ok; +end_per_testcase(_TestCase, _Config) -> + ok. + +%%------------------------------------------------------------------------------ +%% Helper fns +%%------------------------------------------------------------------------------ + +cluster(#{n := N}) -> + Node1 = ds_SUITE1, + Spec = #{ + role => core, + join_to => emqx_cth_cluster:node_name(Node1), + apps => app_specs() + }, + [ + {Node1, Spec} + | lists:map( + fun(M) -> + Name = binary_to_atom(<<"ds_SUITE", (integer_to_binary(M))/binary>>), + {Name, Spec} + end, + lists:seq(2, N) + ) + ]. + +app_specs() -> + [ + emqx_durable_storage, + {emqx, #{ + before_start => fun() -> + emqx_app:set_config_loader(?MODULE) + end, + config => #{persistent_session_store => #{ds => true}}, + override_env => [{boot_modules, [broker, listeners]}] + }} + ]. + +get_mqtt_port(Node, Type) -> + {_IP, Port} = erpc:call(Node, emqx_config, get, [[listeners, Type, default, bind]]), + Port. + +get_all_iterator_refs(Node) -> + erpc:call(Node, mnesia, dirty_all_keys, [?ITERATOR_REF_TAB]). + +get_all_iterator_ids(Node) -> + Fn = fun(K, _V, Acc) -> [K | Acc] end, + erpc:call(Node, fun() -> + emqx_ds_storage_layer:foldl_iterator_prefix(?DS_SHARD, <<>>, Fn, []) + end). + +get_session_iterators(Node, ClientId) -> + erpc:call(Node, fun() -> + [ConnPid] = emqx_cm:lookup_channels(ClientId), + emqx_connection:info({channel, {session, iterators}}, sys:get_state(ConnPid)) + end). + +wait_nodeup(Node) -> + ?retry( + _Sleep0 = 500, + _Attempts0 = 50, + pong = net_adm:ping(Node) + ). + +wait_gen_rpc_down(_NodeSpec = #{apps := Apps}) -> + #{override_env := Env} = proplists:get_value(gen_rpc, Apps), + Port = proplists:get_value(tcp_server_port, Env), + ?retry( + _Sleep0 = 500, + _Attempts0 = 50, + false = emqx_common_test_helpers:is_tcp_server_available("127.0.0.1", Port) + ). + +%%------------------------------------------------------------------------------ +%% Testcases +%%------------------------------------------------------------------------------ + +t_session_subscription_idempotency(Config) -> + [Node1Spec | _] = ?config(node_specs, Config), + [Node1] = ?config(nodes, Config), + Port = get_mqtt_port(Node1, tcp), + SubTopicFilter = <<"t/+">>, + ClientId = <<"myclientid">>, + ?check_trace( + begin + ?force_ordering( + #{?snk_kind := persistent_session_ds_iterator_added}, + _NEvents0 = 1, + #{?snk_kind := will_restart_node}, + _Guard0 = true + ), + ?force_ordering( + #{?snk_kind := restarted_node}, + _NEvents1 = 1, + #{?snk_kind := persistent_session_ds_open_iterators, ?snk_span := start}, + _Guard1 = true + ), + + spawn_link(fun() -> + ?tp(will_restart_node, #{}), + ct:pal("restarting node ~p", [Node1]), + true = monitor_node(Node1, true), + ok = erpc:call(Node1, init, restart, []), + receive + {nodedown, Node1} -> + ok + after 10_000 -> + ct:fail("node ~p didn't stop", [Node1]) + end, + ct:pal("waiting for nodeup ~p", [Node1]), + wait_nodeup(Node1), + wait_gen_rpc_down(Node1Spec), + ct:pal("restarting apps on ~p", [Node1]), + Apps = maps:get(apps, Node1Spec), + ok = erpc:call(Node1, emqx_cth_suite, load_apps, [Apps]), + _ = erpc:call(Node1, emqx_cth_suite, start_apps, [Apps, Node1Spec]), + %% have to re-inject this so that we may stop the node succesfully at the + %% end.... + ok = emqx_cth_cluster:set_node_opts(Node1, Node1Spec), + ok = snabbkaffe:forward_trace(Node1), + ct:pal("node ~p restarted", [Node1]), + ?tp(restarted_node, #{}), + ok + end), + + ct:pal("starting 1"), + {ok, Client0} = emqtt:start_link([ + {port, Port}, + {clientid, ClientId}, + {proto_ver, v5} + ]), + {ok, _} = emqtt:connect(Client0), + ct:pal("subscribing 1"), + process_flag(trap_exit, true), + catch emqtt:subscribe(Client0, SubTopicFilter, qos2), + receive + {'EXIT', {shutdown, _}} -> + ok + after 0 -> ok + end, + process_flag(trap_exit, false), + + {ok, _} = ?block_until(#{?snk_kind := restarted_node}, 15_000), + ct:pal("starting 2"), + {ok, Client1} = emqtt:start_link([ + {port, Port}, + {clientid, ClientId}, + {proto_ver, v5} + ]), + {ok, _} = emqtt:connect(Client1), + ct:pal("subscribing 2"), + {ok, _, [2]} = emqtt:subscribe(Client1, SubTopicFilter, qos2), + SessionIterators = get_session_iterators(Node1, ClientId), + + ok = emqtt:stop(Client1), + + #{session_iterators => SessionIterators} + end, + fun(Res, Trace) -> + ct:pal("trace:\n ~p", [Trace]), + #{session_iterators := SessionIterators} = Res, + %% Exactly one iterator should have been opened. + ?assertEqual(1, map_size(SessionIterators), #{iterators => SessionIterators}), + ?assertMatch(#{SubTopicFilter := _}, SessionIterators), + SubTopicFilterWords = emqx_topic:words(SubTopicFilter), + ?assertEqual([{ClientId, SubTopicFilterWords}], get_all_iterator_refs(Node1)), + ?assertMatch({ok, [_]}, get_all_iterator_ids(Node1)), + ?assertMatch( + {_IsNew = false, ClientId}, + erpc:call(Node1, emqx_ds, session_open, [ClientId]) + ), + ok + end + ), + ok. + +%% Check that we close the iterators before deleting the iterator id entry. +t_session_unsubscription_idempotency(Config) -> + [Node1Spec | _] = ?config(node_specs, Config), + [Node1] = ?config(nodes, Config), + Port = get_mqtt_port(Node1, tcp), + SubTopicFilter = <<"t/+">>, + ClientId = <<"myclientid">>, + ?check_trace( + begin + ?force_ordering( + #{?snk_kind := persistent_session_ds_close_iterators, ?snk_span := {complete, _}}, + _NEvents0 = 1, + #{?snk_kind := will_restart_node}, + _Guard0 = true + ), + ?force_ordering( + #{?snk_kind := restarted_node}, + _NEvents1 = 1, + #{?snk_kind := persistent_session_ds_iterator_delete, ?snk_span := start}, + _Guard1 = true + ), + + spawn_link(fun() -> + ?tp(will_restart_node, #{}), + ct:pal("restarting node ~p", [Node1]), + true = monitor_node(Node1, true), + ok = erpc:call(Node1, init, restart, []), + receive + {nodedown, Node1} -> + ok + after 10_000 -> + ct:fail("node ~p didn't stop", [Node1]) + end, + ct:pal("waiting for nodeup ~p", [Node1]), + wait_nodeup(Node1), + wait_gen_rpc_down(Node1Spec), + ct:pal("restarting apps on ~p", [Node1]), + Apps = maps:get(apps, Node1Spec), + ok = erpc:call(Node1, emqx_cth_suite, load_apps, [Apps]), + _ = erpc:call(Node1, emqx_cth_suite, start_apps, [Apps, Node1Spec]), + %% have to re-inject this so that we may stop the node succesfully at the + %% end.... + ok = emqx_cth_cluster:set_node_opts(Node1, Node1Spec), + ok = snabbkaffe:forward_trace(Node1), + ct:pal("node ~p restarted", [Node1]), + ?tp(restarted_node, #{}), + ok + end), + + ct:pal("starting 1"), + {ok, Client0} = emqtt:start_link([ + {port, Port}, + {clientid, ClientId}, + {proto_ver, v5} + ]), + {ok, _} = emqtt:connect(Client0), + ct:pal("subscribing 1"), + {ok, _, [?RC_GRANTED_QOS_2]} = emqtt:subscribe(Client0, SubTopicFilter, qos2), + ct:pal("unsubscribing 1"), + process_flag(trap_exit, true), + catch emqtt:unsubscribe(Client0, SubTopicFilter), + receive + {'EXIT', {shutdown, _}} -> + ok + after 0 -> ok + end, + process_flag(trap_exit, false), + + {ok, _} = ?block_until(#{?snk_kind := restarted_node}, 15_000), + ct:pal("starting 2"), + {ok, Client1} = emqtt:start_link([ + {port, Port}, + {clientid, ClientId}, + {proto_ver, v5} + ]), + {ok, _} = emqtt:connect(Client1), + ct:pal("subscribing 2"), + {ok, _, [?RC_GRANTED_QOS_2]} = emqtt:subscribe(Client1, SubTopicFilter, qos2), + ct:pal("unsubscribing 2"), + {{ok, _, [?RC_SUCCESS]}, {ok, _}} = + ?wait_async_action( + emqtt:unsubscribe(Client1, SubTopicFilter), + #{ + ?snk_kind := persistent_session_ds_iterator_delete, + ?snk_span := {complete, _} + }, + 15_000 + ), + SessionIterators = get_session_iterators(Node1, ClientId), + + ok = emqtt:stop(Client1), + + #{session_iterators => SessionIterators} + end, + fun(Res, Trace) -> + ct:pal("trace:\n ~p", [Trace]), + #{session_iterators := SessionIterators} = Res, + %% No iterators remaining + ?assertEqual(#{}, SessionIterators), + ?assertEqual([], get_all_iterator_refs(Node1)), + ?assertEqual({ok, []}, get_all_iterator_ids(Node1)), + ok + end + ), + ok. diff --git a/apps/emqx/priv/bpapi.versions b/apps/emqx/priv/bpapi.versions index 6ef46f477..876fe66e0 100644 --- a/apps/emqx/priv/bpapi.versions +++ b/apps/emqx/priv/bpapi.versions @@ -47,6 +47,7 @@ {emqx_node_rebalance_status,1}. {emqx_node_rebalance_status,2}. {emqx_persistent_session,1}. +{emqx_persistent_session_ds,1}. {emqx_plugins,1}. {emqx_prometheus,1}. {emqx_resource,1}. diff --git a/apps/emqx/rebar.config b/apps/emqx/rebar.config index 730155805..a404e5d81 100644 --- a/apps/emqx/rebar.config +++ b/apps/emqx/rebar.config @@ -28,10 +28,10 @@ {gproc, {git, "https://github.com/emqx/gproc", {tag, "0.9.0.1"}}}, {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}}, {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.9.6"}}}, - {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.15.10"}}}, + {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.15.11"}}}, {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "2.8.1"}}}, {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.39.16"}}}, - {emqx_http_lib, {git, "https://github.com/emqx/emqx_http_lib.git", {tag, "0.5.2"}}}, + {emqx_http_lib, {git, "https://github.com/emqx/emqx_http_lib.git", {tag, "0.5.3"}}}, {pbkdf2, {git, "https://github.com/emqx/erlang-pbkdf2.git", {tag, "2.0.4"}}}, {recon, {git, "https://github.com/ferd/recon", {tag, "2.5.1"}}}, {snabbkaffe, {git, "https://github.com/kafka4beam/snabbkaffe.git", {tag, "1.0.8"}}} diff --git a/apps/emqx/src/emqx.app.src b/apps/emqx/src/emqx.app.src index a80d6482a..7a98579df 100644 --- a/apps/emqx/src/emqx.app.src +++ b/apps/emqx/src/emqx.app.src @@ -2,7 +2,7 @@ {application, emqx, [ {id, "emqx"}, {description, "EMQX Core"}, - {vsn, "5.1.8"}, + {vsn, "5.1.9"}, {modules, []}, {registered, []}, {applications, [ diff --git a/apps/emqx/src/emqx_boot.erl b/apps/emqx/src/emqx_boot.erl index 6cbac558f..b3dfcda33 100644 --- a/apps/emqx/src/emqx_boot.erl +++ b/apps/emqx/src/emqx_boot.erl @@ -18,9 +18,9 @@ -export([is_enabled/1]). --define(BOOT_MODULES, [router, broker, listeners]). +-define(BOOT_MODULES, [broker, listeners]). --spec is_enabled(all | router | broker | listeners) -> boolean(). +-spec is_enabled(all | broker | listeners) -> boolean(). is_enabled(Mod) -> (BootMods = boot_modules()) =:= all orelse lists:member(Mod, BootMods). diff --git a/apps/emqx/src/emqx_channel.erl b/apps/emqx/src/emqx_channel.erl index bf1729143..2b20ee709 100644 --- a/apps/emqx/src/emqx_channel.erl +++ b/apps/emqx/src/emqx_channel.erl @@ -122,6 +122,7 @@ -type reply() :: {outgoing, emqx_types:packet()} | {outgoing, [emqx_types:packet()]} + | {connack, emqx_types:packet()} | {event, conn_state() | updated} | {close, Reason :: atom()}. @@ -1023,7 +1024,7 @@ handle_out(publish, [], Channel) -> {ok, Channel}; handle_out(publish, Publishes, Channel) -> {Packets, NChannel} = do_deliver(Publishes, Channel), - {ok, {outgoing, Packets}, NChannel}; + {ok, ?REPLY_OUTGOING(Packets), NChannel}; handle_out(puback, {PacketId, ReasonCode}, Channel) -> {ok, ?PUBACK_PACKET(PacketId, ReasonCode), Channel}; handle_out(pubrec, {PacketId, ReasonCode}, Channel) -> @@ -1048,7 +1049,7 @@ handle_out(disconnect, {ReasonCode, ReasonName}, Channel) -> handle_out(disconnect, {ReasonCode, ReasonName, #{}}, Channel); handle_out(disconnect, {ReasonCode, ReasonName, Props}, Channel = ?IS_MQTT_V5) -> Packet = ?DISCONNECT_PACKET(ReasonCode, Props), - {ok, [{outgoing, Packet}, {close, ReasonName}], Channel}; + {ok, [?REPLY_OUTGOING(Packet), ?REPLY_CLOSE(ReasonName)], Channel}; handle_out(disconnect, {_ReasonCode, ReasonName, _Props}, Channel) -> {ok, {close, ReasonName}, Channel}; handle_out(auth, {ReasonCode, Properties}, Channel) -> @@ -1062,7 +1063,7 @@ handle_out(Type, Data, Channel) -> %%-------------------------------------------------------------------- return_connack(AckPacket, Channel) -> - Replies = [{event, connected}, {connack, AckPacket}], + Replies = [?REPLY_EVENT(connected), ?REPLY_CONNACK(AckPacket)], case maybe_resume_session(Channel) of ignore -> {ok, Replies, Channel}; @@ -1073,7 +1074,7 @@ return_connack(AckPacket, Channel) -> session = NSession }, {Packets, NChannel2} = do_deliver(Publishes, NChannel1), - Outgoing = [{outgoing, Packets} || length(Packets) > 0], + Outgoing = [?REPLY_OUTGOING(Packets) || length(Packets) > 0], {ok, Replies ++ Outgoing, NChannel2} end. @@ -1121,7 +1122,7 @@ do_deliver(Publishes, Channel) when is_list(Publishes) -> %%-------------------------------------------------------------------- return_sub_unsub_ack(Packet, Channel) -> - {ok, [{outgoing, Packet}, {event, updated}], Channel}. + {ok, [?REPLY_OUTGOING(Packet), ?REPLY_EVENT(updated)], Channel}. %%-------------------------------------------------------------------- %% Handle call @@ -1235,7 +1236,7 @@ handle_info( -> Channel1 = ensure_disconnected(Reason, maybe_publish_will_msg(Channel)), case maybe_shutdown(Reason, Channel1) of - {ok, Channel2} -> {ok, {event, disconnected}, Channel2}; + {ok, Channel2} -> {ok, ?REPLY_EVENT(disconnected), Channel2}; Shutdown -> Shutdown end; handle_info({sock_closed, Reason}, Channel = #channel{conn_state = disconnected}) -> @@ -1252,6 +1253,11 @@ handle_info({disconnect, ReasonCode, ReasonName, Props}, Channel) -> handle_out(disconnect, {ReasonCode, ReasonName, Props}, Channel); handle_info({puback, PacketId, PubRes, RC}, Channel) -> do_finish_publish(PacketId, PubRes, RC, Channel); +handle_info({'DOWN', Ref, process, Pid, Reason}, Channel) -> + case emqx_hooks:run_fold('client.monitored_process_down', [Ref, Pid, Reason], []) of + [] -> {ok, Channel}; + Msgs -> {ok, Msgs, Channel} + end; handle_info(Info, Channel) -> ?SLOG(error, #{msg => "unexpected_info", info => Info}), {ok, Channel}. @@ -1358,9 +1364,13 @@ handle_timeout( {_, Quota2} -> {ok, clean_timer(quota_timer, Channel#channel{quota = Quota2})} end; -handle_timeout(_TRef, Msg, Channel) -> - ?SLOG(error, #{msg => "unexpected_timeout", timeout_msg => Msg}), - {ok, Channel}. +handle_timeout(TRef, Msg, Channel) -> + case emqx_hooks:run_fold('client.timeout', [TRef, Msg], []) of + [] -> + {ok, Channel}; + Msgs -> + {ok, Msgs, Channel} + end. %%-------------------------------------------------------------------- %% Ensure timers diff --git a/apps/emqx/src/emqx_cm.erl b/apps/emqx/src/emqx_cm.erl index e3743486c..e3c126629 100644 --- a/apps/emqx/src/emqx_cm.erl +++ b/apps/emqx/src/emqx_cm.erl @@ -21,6 +21,7 @@ -include("emqx.hrl"). -include("emqx_cm.hrl"). +-include("emqx_session.hrl"). -include("logger.hrl"). -include("types.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). @@ -188,7 +189,7 @@ do_unregister_channel({_ClientId, ChanPid} = Chan) -> true = ets:delete(?CHAN_CONN_TAB, Chan), true = ets:delete(?CHAN_INFO_TAB, Chan), ets:delete_object(?CHAN_TAB, Chan), - ok = emqx_hooks:run('channel.unregistered', [ChanPid]), + ok = emqx_hooks:run('cm.channel.unregistered', [ChanPid]), true. %% @doc Get info of a channel. @@ -292,7 +293,7 @@ open_session(false, ClientInfo = #{clientid := ClientId}, ConnInfo) -> create_session(ClientInfo, ConnInfo) -> Options = get_session_confs(ClientInfo, ConnInfo), - Session = emqx_session:init(Options), + Session = emqx_session:init_and_open(Options), ok = emqx_metrics:inc('session.created'), ok = emqx_hooks:run('session.created', [ClientInfo, emqx_session:info(Session)]), Session. diff --git a/apps/emqx/src/emqx_frame.erl b/apps/emqx/src/emqx_frame.erl index 8620f834f..20be12c42 100644 --- a/apps/emqx/src/emqx_frame.erl +++ b/apps/emqx/src/emqx_frame.erl @@ -472,8 +472,8 @@ parse_packet( ) -> {Properties, <<>>} = parse_properties(Rest, ?MQTT_PROTO_V5, StrictMode), #mqtt_packet_auth{reason_code = ReasonCode, properties = Properties}; -parse_packet(_Header, _FrameBin, _Options) -> - ?PARSE_ERR(malformed_packet). +parse_packet(Header, _FrameBin, _Options) -> + ?PARSE_ERR(#{hint => malformed_packet, header_type => Header#mqtt_packet_header.type}). parse_will_message( Packet = #mqtt_packet_connect{ @@ -512,8 +512,16 @@ parse_properties(<<0, Rest/binary>>, ?MQTT_PROTO_V5, _StrictMode) -> {#{}, Rest}; parse_properties(Bin, ?MQTT_PROTO_V5, StrictMode) -> {Len, Rest} = parse_variable_byte_integer(Bin), - <> = Rest, - {parse_property(PropsBin, #{}, StrictMode), Rest1}. + case Rest of + <> -> + {parse_property(PropsBin, #{}, StrictMode), Rest1}; + _ -> + ?PARSE_ERR(#{ + hint => user_property_not_enough_bytes, + parsed_key_length => Len, + remaining_bytes_length => byte_size(Rest) + }) + end. parse_property(<<>>, Props, _StrictMode) -> Props; diff --git a/apps/emqx/src/emqx_persistent_session_ds.erl b/apps/emqx/src/emqx_persistent_session_ds.erl index 27b4f0950..83c2375f2 100644 --- a/apps/emqx/src/emqx_persistent_session_ds.erl +++ b/apps/emqx/src/emqx_persistent_session_ds.erl @@ -16,15 +16,33 @@ -module(emqx_persistent_session_ds). +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). + -export([init/0]). --export([persist_message/1]). +-export([ + persist_message/1, + open_session/1, + add_subscription/2, + del_subscription/3 +]). -export([ serialize_message/1, deserialize_message/1 ]). +%% RPC +-export([ + ensure_iterator_closed_on_all_shards/1, + ensure_all_iterators_closed/1 +]). +-export([ + do_open_iterator/3, + do_ensure_iterator_closed/1, + do_ensure_all_iterators_closed/1 +]). + %% FIXME -define(DS_SHARD, <<"local">>). @@ -72,6 +90,105 @@ store_message(Msg) -> find_subscribers(_Msg) -> [node()]. +open_session(ClientID) -> + ?WHEN_ENABLED(emqx_ds:session_open(ClientID)). + +-spec add_subscription(emqx_types:topic(), emqx_ds:session_id()) -> + {ok, emqx_ds:iterator_id(), IsNew :: boolean()} | {skipped, disabled}. +add_subscription(TopicFilterBin, DSSessionID) -> + ?WHEN_ENABLED( + begin + TopicFilter = emqx_topic:words(TopicFilterBin), + {ok, IteratorID, StartMS, IsNew} = emqx_ds:session_add_iterator( + DSSessionID, TopicFilter + ), + Ctx = #{ + iterator_id => IteratorID, + start_time => StartMS, + is_new => IsNew + }, + ?tp(persistent_session_ds_iterator_added, Ctx), + ?tp_span( + persistent_session_ds_open_iterators, + Ctx, + ok = open_iterator_on_all_shards(TopicFilter, StartMS, IteratorID) + ), + {ok, IteratorID, IsNew} + end + ). + +-spec open_iterator_on_all_shards(emqx_topic:words(), emqx_ds:time(), emqx_ds:iterator_id()) -> ok. +open_iterator_on_all_shards(TopicFilter, StartMS, IteratorID) -> + ?tp(persistent_session_ds_will_open_iterators, #{ + iterator_id => IteratorID, + start_time => StartMS + }), + %% Note: currently, shards map 1:1 to nodes, but this will change in the future. + Nodes = emqx:running_nodes(), + Results = emqx_persistent_session_ds_proto_v1:open_iterator( + Nodes, TopicFilter, StartMS, IteratorID + ), + %% TODO: handle errors + true = lists:all(fun(Res) -> Res =:= {ok, ok} end, Results), + ok. + +%% RPC target. +-spec do_open_iterator(emqx_topic:words(), emqx_ds:time(), emqx_ds:iterator_id()) -> ok. +do_open_iterator(TopicFilter, StartMS, IteratorID) -> + Replay = {TopicFilter, StartMS}, + {ok, _It} = emqx_ds_storage_layer:ensure_iterator(?DS_SHARD, IteratorID, Replay), + ok. + +-spec del_subscription(emqx_ds:iterator_id() | undefined, emqx_types:topic(), emqx_ds:session_id()) -> + ok | {skipped, disabled}. +del_subscription(IteratorID, TopicFilterBin, DSSessionID) -> + ?WHEN_ENABLED( + begin + TopicFilter = emqx_topic:words(TopicFilterBin), + Ctx = #{iterator_id => IteratorID}, + ?tp_span( + persistent_session_ds_close_iterators, + Ctx, + ok = ensure_iterator_closed_on_all_shards(IteratorID) + ), + ?tp_span( + persistent_session_ds_iterator_delete, + Ctx, + emqx_ds:session_del_iterator(DSSessionID, TopicFilter) + ) + end + ). + +-spec ensure_iterator_closed_on_all_shards(emqx_ds:iterator_id()) -> ok. +ensure_iterator_closed_on_all_shards(IteratorID) -> + %% Note: currently, shards map 1:1 to nodes, but this will change in the future. + Nodes = emqx:running_nodes(), + Results = emqx_persistent_session_ds_proto_v1:close_iterator(Nodes, IteratorID), + %% TODO: handle errors + true = lists:all(fun(Res) -> Res =:= {ok, ok} end, Results), + ok. + +%% RPC target. +-spec do_ensure_iterator_closed(emqx_ds:iterator_id()) -> ok. +do_ensure_iterator_closed(IteratorID) -> + ok = emqx_ds_storage_layer:discard_iterator(?DS_SHARD, IteratorID), + ok. + +-spec ensure_all_iterators_closed(emqx_ds:session_id()) -> ok. +ensure_all_iterators_closed(DSSessionID) -> + %% Note: currently, shards map 1:1 to nodes, but this will change in the future. + Nodes = emqx:running_nodes(), + Results = emqx_persistent_session_ds_proto_v1:close_all_iterators(Nodes, DSSessionID), + %% TODO: handle errors + true = lists:all(fun(Res) -> Res =:= {ok, ok} end, Results), + ok. + +%% RPC target. +-spec do_ensure_all_iterators_closed(emqx_ds:session_id()) -> ok. +do_ensure_all_iterators_closed(DSSessionID) -> + ok = emqx_ds_storage_layer:discard_iterator_prefix(?DS_SHARD, DSSessionID), + ok. + %% serialize_message(Msg) -> diff --git a/apps/emqx/src/emqx_session.erl b/apps/emqx/src/emqx_session.erl index d838e95d0..859cee76b 100644 --- a/apps/emqx/src/emqx_session.erl +++ b/apps/emqx/src/emqx_session.erl @@ -44,6 +44,7 @@ -module(emqx_session). -include("emqx.hrl"). +-include("emqx_session.hrl"). -include("emqx_mqtt.hrl"). -include("logger.hrl"). -include("types.hrl"). @@ -59,7 +60,7 @@ unpersist/1 ]). --export([init/1]). +-export([init/1, init_and_open/1]). -export([ info/1, @@ -101,49 +102,13 @@ %% Export for CT -export([set_field/3]). --type sessionID() :: emqx_guid:guid(). +-type session_id() :: emqx_guid:guid(). -export_type([ session/0, - sessionID/0 + session_id/0 ]). --record(session, { - %% Client's id - clientid :: emqx_types:clientid(), - id :: sessionID(), - %% Is this session a persistent session i.e. was it started with Session-Expiry > 0 - is_persistent :: boolean(), - %% Client’s Subscriptions. - subscriptions :: map(), - %% Max subscriptions allowed - max_subscriptions :: non_neg_integer() | infinity, - %% Upgrade QoS? - upgrade_qos :: boolean(), - %% Client <- Broker: QoS1/2 messages sent to the client but - %% have not been unacked. - inflight :: emqx_inflight:inflight(), - %% All QoS1/2 messages published to when client is disconnected, - %% or QoS1/2 messages pending transmission to the Client. - %% - %% Optionally, QoS0 messages pending transmission to the Client. - mqueue :: emqx_mqueue:mqueue(), - %% Next packet id of the session - next_pkt_id = 1 :: emqx_types:packet_id(), - %% Retry interval for redelivering QoS1/2 messages (Unit: millisecond) - retry_interval :: timeout(), - %% Client -> Broker: QoS2 messages received from the client, but - %% have not been completely acknowledged - awaiting_rel :: map(), - %% Maximum number of awaiting QoS2 messages allowed - max_awaiting_rel :: non_neg_integer() | infinity, - %% Awaiting PUBREL Timeout (Unit: millisecond) - await_rel_timeout :: timeout(), - %% Created at - created_at :: pos_integer() - %% Message deliver latency stats -}). - -type inflight_data_phase() :: wait_ack | wait_comp. -record(inflight_data, { @@ -201,6 +166,13 @@ %% Init a Session %%-------------------------------------------------------------------- +-spec init_and_open(options()) -> session(). +init_and_open(Options) -> + #{clientid := ClientID} = Options, + Session0 = emqx_session:init(Options), + _ = emqx_persistent_session_ds:open_session(ClientID), + Session0. + -spec init(options()) -> session(). init(Opts) -> MaxInflight = maps:get(max_inflight, Opts), @@ -297,7 +269,9 @@ info(awaiting_rel_max, #session{max_awaiting_rel = Max}) -> info(await_rel_timeout, #session{await_rel_timeout = Timeout}) -> Timeout; info(created_at, #session{created_at = CreatedAt}) -> - CreatedAt. + CreatedAt; +info(iterators, #session{iterators = Iterators}) -> + Iterators. %% @doc Get stats of the session. -spec stats(session()) -> emqx_types:stats(). @@ -324,11 +298,13 @@ subscribe( case IsNew andalso is_subscriptions_full(Session) of false -> ok = emqx_broker:subscribe(TopicFilter, ClientId, SubOpts), + Session1 = Session#session{subscriptions = maps:put(TopicFilter, SubOpts, Subs)}, + Session2 = add_persistent_subscription(TopicFilter, ClientId, Session1), ok = emqx_hooks:run( 'session.subscribed', [ClientInfo, TopicFilter, SubOpts#{is_new => IsNew}] ), - {ok, Session#session{subscriptions = maps:put(TopicFilter, SubOpts, Subs)}}; + {ok, Session2}; true -> {error, ?RC_QUOTA_EXCEEDED} end. @@ -341,6 +317,17 @@ is_subscriptions_full(#session{ }) -> maps:size(Subs) >= MaxLimit. +-spec add_persistent_subscription(emqx_types:topic(), emqx_types:clientid(), session()) -> + session(). +add_persistent_subscription(TopicFilterBin, ClientId, Session) -> + case emqx_persistent_session_ds:add_subscription(TopicFilterBin, ClientId) of + {ok, IteratorId, _IsNew} -> + Iterators = Session#session.iterators, + Session#session{iterators = Iterators#{TopicFilterBin => IteratorId}}; + _ -> + Session + end. + %%-------------------------------------------------------------------- %% Client -> Broker: UNSUBSCRIBE %%-------------------------------------------------------------------- @@ -348,23 +335,37 @@ is_subscriptions_full(#session{ -spec unsubscribe(emqx_types:clientinfo(), emqx_types:topic(), emqx_types:subopts(), session()) -> {ok, session()} | {error, emqx_types:reason_code()}. unsubscribe( - ClientInfo, + ClientInfo = #{clientid := ClientId}, TopicFilter, UnSubOpts, - Session = #session{subscriptions = Subs} + Session0 = #session{subscriptions = Subs} ) -> case maps:find(TopicFilter, Subs) of {ok, SubOpts} -> ok = emqx_broker:unsubscribe(TopicFilter), + Session1 = remove_persistent_subscription(Session0, TopicFilter, ClientId), ok = emqx_hooks:run( 'session.unsubscribed', [ClientInfo, TopicFilter, maps:merge(SubOpts, UnSubOpts)] ), - {ok, Session#session{subscriptions = maps:remove(TopicFilter, Subs)}}; + {ok, Session1#session{subscriptions = maps:remove(TopicFilter, Subs)}}; error -> {error, ?RC_NO_SUBSCRIPTION_EXISTED} end. +-spec remove_persistent_subscription(session(), emqx_types:topic(), emqx_types:clientid()) -> + session(). +remove_persistent_subscription(Session, TopicFilterBin, ClientId) -> + Iterators = Session#session.iterators, + case maps:get(TopicFilterBin, Iterators, undefined) of + undefined -> + ok; + IteratorId -> + _ = emqx_persistent_session_ds:del_subscription(IteratorId, TopicFilterBin, ClientId), + ok + end, + Session#session{iterators = maps:remove(TopicFilterBin, Iterators)}. + %%-------------------------------------------------------------------- %% Client -> Broker: PUBLISH %%-------------------------------------------------------------------- diff --git a/apps/emqx/src/emqx_session_router.erl b/apps/emqx/src/emqx_session_router.erl index 1567f9e62..25484bdf0 100644 --- a/apps/emqx/src/emqx_session_router.erl +++ b/apps/emqx/src/emqx_session_router.erl @@ -21,6 +21,7 @@ -include("emqx.hrl"). -include("logger.hrl"). -include("types.hrl"). +-include("persistent_session/emqx_persistent_session.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). diff --git a/apps/emqx/src/emqx_sup.erl b/apps/emqx/src/emqx_sup.erl index 8c79e7482..1893dba86 100644 --- a/apps/emqx/src/emqx_sup.erl +++ b/apps/emqx/src/emqx_sup.erl @@ -74,7 +74,7 @@ init([]) -> Children = [KernelSup] ++ [SessionSup || emqx_persistent_session:is_store_enabled()] ++ - [RouterSup || emqx_boot:is_enabled(router)] ++ + [RouterSup || emqx_boot:is_enabled(broker)] ++ [BrokerSup || emqx_boot:is_enabled(broker)] ++ [CMSup || emqx_boot:is_enabled(broker)] ++ [SysSup, Limiter], diff --git a/apps/emqx/src/emqx_trace/emqx_trace.erl b/apps/emqx/src/emqx_trace/emqx_trace.erl index 9ce6f9d38..4fff45229 100644 --- a/apps/emqx/src/emqx_trace/emqx_trace.erl +++ b/apps/emqx/src/emqx_trace/emqx_trace.erl @@ -160,7 +160,7 @@ create(Trace) -> end; false -> {error, - "The number of traces created has reache the maximum" + "The number of traces created has reached the maximum" " please delete the useless ones first"} end. @@ -371,10 +371,16 @@ start_trace(Trace) -> stop_trace(Finished, Started) -> lists:foreach( - fun(#{name := Name, type := Type, filter := Filter}) -> + fun(#{name := Name, id := HandlerID, dst := FilePath, type := Type, filter := Filter}) -> case lists:member(Name, Finished) of true -> - ?TRACE("API", "trace_stopping", #{Type => Filter}), + _ = maybe_sync_logfile(HandlerID), + case file:read_file_info(FilePath) of + {ok, #file_info{size = Size}} when Size > 0 -> + ?TRACE("API", "trace_stopping", #{Type => Filter}); + _ -> + ok + end, emqx_trace_handler:uninstall(Type, Name); false -> ok @@ -383,6 +389,19 @@ stop_trace(Finished, Started) -> Started ). +maybe_sync_logfile(HandlerID) -> + case logger:get_handler_config(HandlerID) of + {ok, #{module := Mod}} -> + case erlang:function_exported(Mod, filesync, 1) of + true -> + Mod:filesync(HandlerID); + false -> + ok + end; + _ -> + ok + end. + clean_stale_trace_files() -> TraceDir = trace_dir(), case file:list_dir(TraceDir) of diff --git a/apps/emqx/src/persistent_session/emqx_persistent_session.erl b/apps/emqx/src/persistent_session/emqx_persistent_session.erl index 111154571..d85e13d67 100644 --- a/apps/emqx/src/persistent_session/emqx_persistent_session.erl +++ b/apps/emqx/src/persistent_session/emqx_persistent_session.erl @@ -115,10 +115,10 @@ storage_backend() -> %% Session message ADT API %%-------------------------------------------------------------------- --spec session_message_info('timestamp' | 'sessionID', sess_msg_key()) -> term(). +-spec session_message_info('timestamp' | 'session_id', sess_msg_key()) -> term(). session_message_info(timestamp, {_, <<>>, <>, ?ABANDONED}) -> TS; session_message_info(timestamp, {_, GUID, _, _}) -> emqx_guid:timestamp(GUID); -session_message_info(sessionID, {SessionID, _, _, _}) -> SessionID. +session_message_info(session_id, {SessionID, _, _, _}) -> SessionID. %%-------------------------------------------------------------------- %% DB API @@ -243,7 +243,7 @@ discard_opt(true, ClientID, Session) -> emqx_session_router:delete_routes(SessionID, Subscriptions), emqx_session:set_field(is_persistent, false, Session). --spec mark_resume_begin(emqx_session:sessionID()) -> emqx_guid:guid(). +-spec mark_resume_begin(emqx_session:session_id()) -> emqx_guid:guid(). mark_resume_begin(SessionID) -> MarkerID = emqx_guid:gen(), put_session_message({SessionID, MarkerID, <<>>, ?MARKER}), @@ -396,12 +396,12 @@ do_mark_as_delivered(SessionID, [{deliver, STopic, Msg} | Left]) -> do_mark_as_delivered(_SessionID, []) -> ok. --spec pending(emqx_session:sessionID()) -> +-spec pending(emqx_session:session_id()) -> [{emqx_types:message(), STopic :: binary()}]. pending(SessionID) -> pending_messages_in_db(SessionID, []). --spec pending(emqx_session:sessionID(), MarkerIDs :: [emqx_guid:guid()]) -> +-spec pending(emqx_session:session_id(), MarkerIDs :: [emqx_guid:guid()]) -> [{emqx_types:message(), STopic :: binary()}]. pending(SessionID, MarkerIds) -> %% TODO: Handle lost MarkerIDs @@ -460,8 +460,8 @@ read_pending_msgs([], Acc) -> lists:reverse(Acc). %% The keys are ordered by -%% {sessionID(), <<>>, bin_timestamp(), ?ABANDONED} For abandoned sessions (clean started or expired). -%% {sessionID(), emqx_guid:guid(), STopic :: binary(), ?DELIVERED | ?UNDELIVERED | ?MARKER} +%% {session_id(), <<>>, bin_timestamp(), ?ABANDONED} For abandoned sessions (clean started or expired). +%% {session_id(), emqx_guid:guid(), STopic :: binary(), ?DELIVERED | ?UNDELIVERED | ?MARKER} %% where %% <<>> < emqx_guid:guid() %% <<>> < bin_timestamp() @@ -491,7 +491,7 @@ pending_messages({SessionID, PrevMsgId, PrevSTopic, PrevTag} = PrevKey, Acc, Mar false -> pending_messages(Key, Acc, MarkerIds); true -> pending_messages(Key, [{PrevMsgId, PrevSTopic} | Acc], MarkerIds) end; - %% Next sessionID or '$end_of_table' + %% Next session_id or '$end_of_table' _What -> case PrevTag =:= ?UNDELIVERED of false -> {lists:reverse(Acc), MarkerIds}; diff --git a/apps/emqx/src/persistent_session/emqx_persistent_session.hrl b/apps/emqx/src/persistent_session/emqx_persistent_session.hrl index eb4224116..5476d8daf 100644 --- a/apps/emqx/src/persistent_session/emqx_persistent_session.hrl +++ b/apps/emqx/src/persistent_session/emqx_persistent_session.hrl @@ -14,6 +14,8 @@ %% limitations under the License. %%-------------------------------------------------------------------- +-define(PERSISTENT_SESSION_SHARD, emqx_persistent_session_shard). + -record(session_store, { client_id :: binary(), expiry_interval :: non_neg_integer(), diff --git a/apps/emqx/src/persistent_session/emqx_persistent_session_gc.erl b/apps/emqx/src/persistent_session/emqx_persistent_session_gc.erl index a4c4e5422..4aa59cdef 100644 --- a/apps/emqx/src/persistent_session/emqx_persistent_session_gc.erl +++ b/apps/emqx/src/persistent_session/emqx_persistent_session_gc.erl @@ -56,6 +56,7 @@ start_link() -> init([]) -> process_flag(trap_exit, true), + mria_rlog:ensure_shard(?PERSISTENT_SESSION_SHARD), {ok, start_message_gc_timer(start_session_gc_timer(#{}))}. handle_call(_Request, _From, State) -> diff --git a/apps/emqx/src/proto/emqx_persistent_session_ds_proto_v1.erl b/apps/emqx/src/proto/emqx_persistent_session_ds_proto_v1.erl new file mode 100644 index 000000000..b1926098d --- /dev/null +++ b/apps/emqx/src/proto/emqx_persistent_session_ds_proto_v1.erl @@ -0,0 +1,79 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_persistent_session_ds_proto_v1). + +-behaviour(emqx_bpapi). + +-export([ + introduced_in/0, + + open_iterator/4, + close_iterator/2, + close_all_iterators/2 +]). + +-include_lib("emqx/include/bpapi.hrl"). + +-define(TIMEOUT, 30_000). + +introduced_in() -> + %% FIXME + "5.3.0". + +-spec open_iterator( + [node()], + emqx_topic:words(), + emqx_ds:time(), + emqx_ds:iterator_id() +) -> + emqx_rpc:erpc_multicall(ok). +open_iterator(Nodes, TopicFilter, StartMS, IteratorID) -> + erpc:multicall( + Nodes, + emqx_persistent_session_ds, + do_open_iterator, + [TopicFilter, StartMS, IteratorID], + ?TIMEOUT + ). + +-spec close_iterator( + [node()], + emqx_ds:iterator_id() +) -> + emqx_rpc:erpc_multicall(ok). +close_iterator(Nodes, IteratorID) -> + erpc:multicall( + Nodes, + emqx_persistent_session_ds, + do_ensure_iterator_closed, + [IteratorID], + ?TIMEOUT + ). + +-spec close_all_iterators( + [node()], + emqx_ds:session_id() +) -> + emqx_rpc:erpc_multicall(ok). +close_all_iterators(Nodes, DSSessionID) -> + erpc:multicall( + Nodes, + emqx_persistent_session_ds, + do_ensure_all_iterators_closed, + [DSSessionID], + ?TIMEOUT + ). diff --git a/apps/emqx/test/emqx_access_control_SUITE.erl b/apps/emqx/test/emqx_access_control_SUITE.erl index 5d4344de6..8f7a1fa6c 100644 --- a/apps/emqx/test/emqx_access_control_SUITE.erl +++ b/apps/emqx/test/emqx_access_control_SUITE.erl @@ -26,7 +26,7 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - emqx_common_test_helpers:boot_modules([router, broker]), + emqx_common_test_helpers:boot_modules([broker]), emqx_common_test_helpers:start_apps([]), Config. diff --git a/apps/emqx/test/emqx_boot_SUITE.erl b/apps/emqx/test/emqx_boot_SUITE.erl index 06f08afb8..006888d9c 100644 --- a/apps/emqx/test/emqx_boot_SUITE.erl +++ b/apps/emqx/test/emqx_boot_SUITE.erl @@ -26,19 +26,12 @@ all() -> emqx_common_test_helpers:all(?MODULE). t_is_enabled(_) -> try ok = application:set_env(emqx, boot_modules, all), - ?assert(emqx_boot:is_enabled(router)), ?assert(emqx_boot:is_enabled(broker)), ?assert(emqx_boot:is_enabled(listeners)), - ok = application:set_env(emqx, boot_modules, [router]), - ?assert(emqx_boot:is_enabled(router)), - ?assertNot(emqx_boot:is_enabled(broker)), - ?assertNot(emqx_boot:is_enabled(listeners)), - ok = application:set_env(emqx, boot_modules, [router, broker]), - ?assert(emqx_boot:is_enabled(router)), + ok = application:set_env(emqx, boot_modules, [broker]), ?assert(emqx_boot:is_enabled(broker)), ?assertNot(emqx_boot:is_enabled(listeners)), - ok = application:set_env(emqx, boot_modules, [router, broker, listeners]), - ?assert(emqx_boot:is_enabled(router)), + ok = application:set_env(emqx, boot_modules, [broker, listeners]), ?assert(emqx_boot:is_enabled(broker)), ?assert(emqx_boot:is_enabled(listeners)) after diff --git a/apps/emqx/test/emqx_broker_SUITE.erl b/apps/emqx/test/emqx_broker_SUITE.erl index ba5438641..ca464ee27 100644 --- a/apps/emqx/test/emqx_broker_SUITE.erl +++ b/apps/emqx/test/emqx_broker_SUITE.erl @@ -59,39 +59,54 @@ groups() -> init_per_group(connected_client_count_group, Config) -> Config; init_per_group(tcp, Config) -> - emqx_common_test_helpers:boot_modules(all), - emqx_common_test_helpers:start_apps([]), - [{conn_fun, connect} | Config]; + Apps = emqx_cth_suite:start( + [emqx], + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + [{conn_fun, connect}, {group_apps, Apps} | Config]; init_per_group(ws, Config) -> - emqx_common_test_helpers:boot_modules(all), - emqx_common_test_helpers:start_apps([]), + Apps = emqx_cth_suite:start( + [emqx], + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), [ {ssl, false}, {enable_websocket, true}, {conn_fun, ws_connect}, {port, 8083}, - {host, "localhost"} + {host, "localhost"}, + {group_apps, Apps} | Config ]; init_per_group(quic, Config) -> - emqx_common_test_helpers:boot_modules(all), - emqx_common_test_helpers:start_apps([]), - UdpPort = 14567, - ok = emqx_common_test_helpers:ensure_quic_listener(?MODULE, UdpPort), + Apps = emqx_cth_suite:start( + [ + {emqx, + "listeners.quic.test {" + "\n enable = true" + "\n max_connections = 1024000" + "\n idle_timeout = 15s" + "\n }"} + ], + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), [ {conn_fun, quic_connect}, - {port, UdpPort} + {port, emqx_config:get([listeners, quic, test, bind])}, + {group_apps, Apps} | Config ]; init_per_group(_Group, Config) -> - emqx_common_test_helpers:boot_modules(all), - emqx_common_test_helpers:start_apps([]), - Config. + Apps = emqx_cth_suite:start( + [emqx], + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + [{group_apps, Apps} | Config]. end_per_group(connected_client_count_group, _Config) -> ok; -end_per_group(_Group, _Config) -> - emqx_common_test_helpers:stop_apps([]). +end_per_group(_Group, Config) -> + emqx_cth_suite:stop(?config(group_apps, Config)). init_per_suite(Config) -> Config. diff --git a/apps/emqx/test/emqx_connection_SUITE.erl b/apps/emqx/test/emqx_connection_SUITE.erl index f5e978aeb..01fe3c3db 100644 --- a/apps/emqx/test/emqx_connection_SUITE.erl +++ b/apps/emqx/test/emqx_connection_SUITE.erl @@ -49,7 +49,7 @@ init_per_suite(Config) -> %% Meck Hooks ok = meck:new(emqx_hooks, [passthrough, no_history, no_link]), ok = meck:expect(emqx_hooks, run, fun(_Hook, _Args) -> ok end), - ok = meck:expect(emqx_hooks, run_fold, fun(_Hook, _Args, Acc) -> {ok, Acc} end), + ok = meck:expect(emqx_hooks, run_fold, fun(_Hook, _Args, Acc) -> Acc end), ok = meck:expect(emqx_channel, ensure_disconnected, fun(_, Channel) -> Channel end), diff --git a/apps/emqx/test/emqx_crl_cache_SUITE.erl b/apps/emqx/test/emqx_crl_cache_SUITE.erl index 6c6337038..248013ce9 100644 --- a/apps/emqx/test/emqx_crl_cache_SUITE.erl +++ b/apps/emqx/test/emqx_crl_cache_SUITE.erl @@ -41,6 +41,7 @@ init_per_suite(Config) -> Config. end_per_suite(_Config) -> + emqx_config:erase_all(), ok. init_per_testcase(TestCase, Config) when diff --git a/apps/emqx/test/emqx_cth_cluster.erl b/apps/emqx/test/emqx_cth_cluster.erl index e1dc9d977..b41586518 100644 --- a/apps/emqx/test/emqx_cth_cluster.erl +++ b/apps/emqx/test/emqx_cth_cluster.erl @@ -14,17 +14,38 @@ %% limitations under the License. %%-------------------------------------------------------------------- +%% @doc Common Test Helper / Running tests in a cluster +%% +%% This module allows setting up and tearing down clusters of EMQX nodes with +%% the purpose of running integration tests in a distributed environment, but +%% with the same isolation measures that `emqx_cth_suite` provides. +%% +%% Additionally to what `emqx_cth_suite` does with respect to isolation, each +%% node in the cluster is started with a separate, unique working directory. +%% +%% What should be started on each node is defined by the same appspecs that are +%% used by `emqx_cth_suite` to start applications on the CT node. However, there +%% are additional set of defaults applied to appspecs to make sure that the +%% cluster is started in a consistent, interconnected state, with no conflicts +%% between applications. +%% +%% Most of the time, you just need to: +%% 1. Describe the cluster with one or more _nodespecs_. +%% 2. Call `emqx_cth_cluster:start/2` before the testrun (e.g. in `init_per_suite/1` +%% or `init_per_group/2`), providing unique work dir (e.g. +%% `emqx_cth_suite:work_dir/1`). Save the result in a context. +%% 3. Call `emqx_cth_cluster:stop/1` after the testrun concludes (e.g. +%% in `end_per_suite/1` or `end_per_group/2`) with the result from step 2. -module(emqx_cth_cluster). -export([start/2]). --export([stop/1]). +-export([stop/1, stop_node/1]). -export([start_bare_node/2]). -export([share_load_module/2]). --export([node_name/1]). - --export([node_name/1]). +-export([node_name/1, mk_nodespecs/2]). +-export([start_apps/2, set_node_opts/2]). -define(APPS_CLUSTERING, [gen_rpc, mria, ekka]). @@ -88,7 +109,7 @@ when }. start(Nodes, ClusterOpts) -> NodeSpecs = mk_nodespecs(Nodes, ClusterOpts), - ct:pal("Starting cluster: ~p", [NodeSpecs]), + ct:pal("Starting cluster:\n ~p", [NodeSpecs]), % 1. Start bare nodes with only basic applications running _ = emqx_utils:pmap(fun start_node_init/1, NodeSpecs, ?TIMEOUT_NODE_START_MS), % 2. Start applications needed to enable clustering @@ -289,17 +310,20 @@ load_apps(Node, #{apps := Apps}) -> erpc:call(Node, emqx_cth_suite, load_apps, [Apps]). start_apps_clustering(Node, #{apps := Apps} = Spec) -> - SuiteOpts = maps:with([work_dir], Spec), + SuiteOpts = suite_opts(Spec), AppsClustering = [lists:keyfind(App, 1, Apps) || App <- ?APPS_CLUSTERING], _Started = erpc:call(Node, emqx_cth_suite, start, [AppsClustering, SuiteOpts]), ok. start_apps(Node, #{apps := Apps} = Spec) -> - SuiteOpts = maps:with([work_dir], Spec), + SuiteOpts = suite_opts(Spec), AppsRest = [AppSpec || AppSpec = {App, _} <- Apps, not lists:member(App, ?APPS_CLUSTERING)], _Started = erpc:call(Node, emqx_cth_suite, start_apps, [AppsRest, SuiteOpts]), ok. +suite_opts(Spec) -> + maps:with([work_dir], Spec). + maybe_join_cluster(_Node, #{role := replicant}) -> ok; maybe_join_cluster(Node, Spec) -> diff --git a/apps/emqx/test/emqx_cth_suite.erl b/apps/emqx/test/emqx_cth_suite.erl index e5668b77a..dddd096fa 100644 --- a/apps/emqx/test/emqx_cth_suite.erl +++ b/apps/emqx/test/emqx_cth_suite.erl @@ -14,6 +14,47 @@ %% limitations under the License. %%-------------------------------------------------------------------- +%% @doc Common Test Helper / Running test suites +%% +%% The purpose of this module is to run application-level, integration +%% tests in an isolated fashion. +%% +%% Isolation is this context means that each testrun does not leave any +%% persistent state accessible to following testruns. The goal is to +%% make testruns completely independent of each other, of the order in +%% which they are executed, and of the testrun granularity, i.e. whether +%% they are executed individually or as part of a larger suite. This +%% should help to increase reproducibility and reduce the risk of false +%% positives. +%% +%% Isolation is achieved through the following measures: +%% * Each testrun completely terminates and unload all applications +%% started during the testrun. +%% * Each testrun is executed in a separate directory, usually under +%% common_test's private directory, where all persistent state should +%% be stored. +%% * Additionally, each cleans out few bits of persistent state that +%% survives the above measures, namely persistent VM terms related +%% to configuration and authentication (see `clean_suite_state/0`). +%% +%% Integration test in this context means a test that works with applications +%% as a whole, and needs to start and stop them as part of the test run. +%% For this, there's an abstraction called _appspec_ that describes how to +%% configure and start an application. +%% +%% The module also provides a set of default appspecs for some applications +%% that hide details and quirks of how to start them, to make it easier to +%% write test suites. +%% +%% Most of the time, you just need to: +%% 1. Describe the appspecs for the applications you want to test. +%% 2. Call `emqx_cth_sutie:start/2` to start the applications before the testrun +%% (e.g. in `init_per_suite/1` / `init_per_group/2`), providing the appspecs +%% and unique work dir for the testrun (e.g. `work_dir/1`). Save the result +%% in a context. +%% 3. Call `emqx_cth_sutie:stop/1` to stop the applications after the testrun +%% finishes (e.g. in `end_per_suite/1` / `end_per_group/2`), providing the +%% result from step 2. -module(emqx_cth_suite). -include_lib("common_test/include/ct.hrl"). @@ -22,6 +63,9 @@ -export([start/2]). -export([stop/1]). +-export([work_dir/1]). +-export([work_dir/2]). + -export([load_apps/1]). -export([start_apps/2]). -export([start_app/2]). @@ -98,7 +142,8 @@ when SuiteOpts :: #{ %% Working directory %% Everything a test produces should go here. If this directory is not empty, - %% function will raise an error. + %% function will raise an error. Most of the time, the result of `work_dir/1` + %% or `work_dir/2` (if used in a testcase) should be fine here. work_dir := file:name() }. start(Apps, SuiteOpts = #{work_dir := WorkDir}) -> @@ -333,6 +378,45 @@ default_config(App, SuiteOpts) -> %% +%% @doc Determine the unique work directory for the current test run. +%% Takes into account name of the test suite, and all test groups the current run +%% is part of. +-spec work_dir(CTConfig :: proplists:proplist()) -> + file:filename_all(). +work_dir(CTConfig) -> + % Directory specific to the current test run. + [PrivDir] = proplists:get_all_values(priv_dir, CTConfig), + % Directory specific to the currently executing test suite. + [DataDir] = proplists:get_all_values(data_dir, CTConfig), + % NOTE: Contains the name of the current test group, if executed as part of a group. + GroupProps = proplists:get_value(tc_group_properties, CTConfig, []), + % NOTE: Contains names of outer test groups, if any. + GroupPathOuter = proplists:get_value(tc_group_path, CTConfig, []), + SuiteDir = filename:basename(DataDir), + GroupPath = lists:append([GroupProps | GroupPathOuter]), + GroupLevels = [atom_to_list(Name) || {name, Name} <- GroupPath], + WorkDir1 = filename:join(PrivDir, SuiteDir), + WorkDir2 = + case GroupLevels of + [] -> + WorkDir1; + [_ | _] -> + GroupDir = string:join(lists:reverse(GroupLevels), "."), + filename:join(WorkDir1, GroupDir) + end, + WorkDir2. + +%% @doc Determine the unique work directory for the current testcase run. +%% Be careful when testcase runs under no groups, and its name matches the name of a +%% previously executed test group, it's best to avoid such naming. +-spec work_dir(TestCaseName :: atom(), CTConfig :: proplists:proplist()) -> + file:filename_all(). +work_dir(TCName, CTConfig) -> + WorkDir = work_dir(CTConfig), + filename:join(WorkDir, TCName). + +%% + start_ekka() -> ok = emqx_common_test_helpers:start_ekka(), {ok, [mnesia, ekka]}. diff --git a/apps/emqx/test/emqx_flapping_SUITE.erl b/apps/emqx/test/emqx_flapping_SUITE.erl index 6204d9b6d..021eaddbf 100644 --- a/apps/emqx/test/emqx_flapping_SUITE.erl +++ b/apps/emqx/test/emqx_flapping_SUITE.erl @@ -35,7 +35,7 @@ init_per_suite(Config) -> "\n ban_time = 2s" "\n }"} ], - #{work_dir => ?config(priv_dir, Config)} + #{work_dir => emqx_cth_suite:work_dir(Config)} ), [{suite_apps, Apps} | Config]. diff --git a/apps/emqx/test/emqx_persistent_messages_SUITE.erl b/apps/emqx/test/emqx_persistent_messages_SUITE.erl index b818e3fec..c4f7ef73b 100644 --- a/apps/emqx/test/emqx_persistent_messages_SUITE.erl +++ b/apps/emqx/test/emqx_persistent_messages_SUITE.erl @@ -17,32 +17,45 @@ -module(emqx_persistent_messages_SUITE). -include_lib("stdlib/include/assert.hrl"). +-include_lib("common_test/include/ct.hrl"). +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). -compile(export_all). -compile(nowarn_export_all). --define(NOW, - (calendar:system_time_to_rfc3339(erlang:system_time(millisecond), [{unit, millisecond}])) -). +-define(DS_SHARD, <<"local">>). all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - {ok, _} = application:ensure_all_started(emqx_durable_storage), - ok = emqx_common_test_helpers:start_apps([], fun - (emqx) -> - emqx_common_test_helpers:boot_modules(all), - emqx_config:init_load(emqx_schema, <<"persistent_session_store.ds = true">>), - emqx_app:set_config_loader(?MODULE); - (_) -> - ok - end), + %% avoid inter-suite flakiness... + %% TODO: remove after other suites start to use `emx_cth_suite' + application:stop(emqx), + application:stop(emqx_durable_storage), + TCApps = emqx_cth_suite:start( + app_specs(), + #{work_dir => emqx_cth_suite:work_dir(Config)} + ), + [{tc_apps, TCApps} | Config]. + +end_per_suite(Config) -> + TCApps = ?config(tc_apps, Config), + emqx_cth_suite:stop(TCApps), + ok. + +init_per_testcase(t_session_subscription_iterators, Config) -> + Cluster = cluster(), + Nodes = emqx_cth_cluster:start(Cluster, #{work_dir => ?config(priv_dir, Config)}), + [{nodes, Nodes} | Config]; +init_per_testcase(_TestCase, Config) -> Config. -end_per_suite(_Config) -> - emqx_common_test_helpers:stop_apps([]), - application:stop(emqx_durable_storage), +end_per_testcase(t_session_subscription_iterators, Config) -> + Nodes = ?config(nodes, Config), + ok = emqx_cth_cluster:stop(Nodes), + ok; +end_per_testcase(_TestCase, _Config) -> ok. t_messages_persisted(_Config) -> @@ -76,7 +89,7 @@ t_messages_persisted(_Config) -> ct:pal("Results = ~p", [Results]), - Persisted = consume(<<"local">>, {['#'], 0}), + Persisted = consume(?DS_SHARD, {['#'], 0}), ct:pal("Persisted = ~p", [Persisted]), @@ -88,6 +101,97 @@ t_messages_persisted(_Config) -> ok. +%% TODO: test quic and ws too +t_session_subscription_iterators(Config) -> + [Node1, Node2] = ?config(nodes, Config), + Port = get_mqtt_port(Node1, tcp), + Topic = <<"t/topic">>, + SubTopicFilter = <<"t/+">>, + AnotherTopic = <<"u/another-topic">>, + ClientId = <<"myclientid">>, + ?check_trace( + begin + [ + Payload1, + Payload2, + Payload3, + Payload4 + ] = lists:map( + fun(N) -> <<"hello", (integer_to_binary(N))/binary>> end, + lists:seq(1, 4) + ), + ct:pal("starting"), + {ok, Client} = emqtt:start_link([ + {port, Port}, + {clientid, ClientId}, + {proto_ver, v5} + ]), + {ok, _} = emqtt:connect(Client), + ct:pal("publishing 1"), + Message1 = emqx_message:make(Topic, Payload1), + publish(Node1, Message1), + ct:pal("subscribing 1"), + {ok, _, [2]} = emqtt:subscribe(Client, SubTopicFilter, qos2), + ct:pal("publishing 2"), + Message2 = emqx_message:make(Topic, Payload2), + publish(Node1, Message2), + [_] = receive_messages(1), + ct:pal("subscribing 2"), + {ok, _, [1]} = emqtt:subscribe(Client, SubTopicFilter, qos1), + ct:pal("publishing 3"), + Message3 = emqx_message:make(Topic, Payload3), + publish(Node1, Message3), + [_] = receive_messages(1), + ct:pal("publishing 4"), + Message4 = emqx_message:make(AnotherTopic, Payload4), + publish(Node1, Message4), + emqtt:stop(Client), + #{ + messages => [Message1, Message2, Message3, Message4] + } + end, + fun(Results, Trace) -> + ct:pal("trace:\n ~p", [Trace]), + #{ + messages := [_Message1, Message2, Message3 | _] + } = Results, + case ?of_kind(ds_session_subscription_added, Trace) of + [] -> + %% Since `emqx_durable_storage' is a dependency of `emqx', it gets + %% compiled in "prod" mode when running emqx standalone tests. + ok; + [_ | _] -> + ?assertMatch( + [ + #{?snk_kind := ds_session_subscription_added}, + #{?snk_kind := ds_session_subscription_present} + ], + ?of_kind( + [ + ds_session_subscription_added, + ds_session_subscription_present + ], + Trace + ) + ), + ok + end, + ?assertMatch({ok, [_]}, get_all_iterator_ids(Node1)), + {ok, [IteratorId]} = get_all_iterator_ids(Node1), + ?assertMatch({ok, [IteratorId]}, get_all_iterator_ids(Node2)), + ReplayMessages1 = erpc:call(Node1, fun() -> consume(?DS_SHARD, IteratorId) end), + ExpectedMessages = [Message2, Message3], + %% Note: it is expected that this will break after replayers are in place. + %% They might have consumed all the messages by this time. + ?assertEqual(ExpectedMessages, ReplayMessages1), + %% Different DS shard + ReplayMessages2 = erpc:call(Node2, fun() -> consume(?DS_SHARD, IteratorId) end), + ?assertEqual([], ReplayMessages2), + ok + end + ), + ok. + %% connect(ClientId, CleanStart, EI) -> @@ -103,8 +207,11 @@ connect(ClientId, CleanStart, EI) -> {ok, _} = emqtt:connect(Client), Client. -consume(Shard, Replay) -> +consume(Shard, Replay = {_TopicFiler, _StartMS}) -> {ok, It} = emqx_ds_storage_layer:make_iterator(Shard, Replay), + consume(It); +consume(Shard, IteratorId) when is_binary(IteratorId) -> + {ok, It} = emqx_ds_storage_layer:restore_iterator(Shard, IteratorId), consume(It). consume(It) -> @@ -114,3 +221,54 @@ consume(It) -> none -> [] end. + +receive_messages(Count) -> + receive_messages(Count, []). + +receive_messages(0, Msgs) -> + Msgs; +receive_messages(Count, Msgs) -> + receive + {publish, Msg} -> + receive_messages(Count - 1, [Msg | Msgs]) + after 5_000 -> + Msgs + end. + +publish(Node, Message) -> + erpc:call(Node, emqx, publish, [Message]). + +get_iterator_ids(Node, ClientId) -> + Channel = erpc:call(Node, fun() -> + [ConnPid] = emqx_cm:lookup_channels(ClientId), + sys:get_state(ConnPid) + end), + emqx_connection:info({channel, {session, iterators}}, Channel). + +app_specs() -> + [ + emqx_durable_storage, + {emqx, #{ + config => #{persistent_session_store => #{ds => true}}, + override_env => [{boot_modules, [broker, listeners]}] + }} + ]. + +cluster() -> + Node1 = persistent_messages_SUITE1, + Spec = #{ + role => core, + join_to => emqx_cth_cluster:node_name(Node1), + apps => app_specs() + }, + [ + {Node1, Spec}, + {persistent_messages_SUITE2, Spec} + ]. + +get_mqtt_port(Node, Type) -> + {_IP, Port} = erpc:call(Node, emqx_config, get, [[listeners, Type, default, bind]]), + Port. + +get_all_iterator_ids(Node) -> + erpc:call(Node, emqx_ds_storage_layer, list_iterator_prefix, [?DS_SHARD, <<>>]). diff --git a/apps/emqx/test/emqx_proper_types.erl b/apps/emqx/test/emqx_proper_types.erl index 0e9d3032c..ab1720754 100644 --- a/apps/emqx/test/emqx_proper_types.erl +++ b/apps/emqx/test/emqx_proper_types.erl @@ -20,6 +20,7 @@ -include_lib("proper/include/proper.hrl"). -include("emqx.hrl"). +-include("emqx_session.hrl"). -include("emqx_access_control.hrl"). %% High level Types @@ -132,33 +133,22 @@ clientinfo() -> sessioninfo() -> ?LET( Session, - {session, clientid(), - % id - sessionid(), - % is_persistent - boolean(), - % subscriptions - subscriptions(), - % max_subscriptions - non_neg_integer(), - % upgrade_qos - boolean(), - % emqx_inflight:inflight() - inflight(), - % emqx_mqueue:mqueue() - mqueue(), - % next_pkt_id - packet_id(), - % retry_interval - safty_timeout(), - % awaiting_rel - awaiting_rel(), - % max_awaiting_rel - non_neg_integer(), - % await_rel_timeout - safty_timeout(), - % created_at - timestamp()}, + #session{ + clientid = clientid(), + id = sessionid(), + is_persistent = boolean(), + subscriptions = subscriptions(), + max_subscriptions = non_neg_integer(), + upgrade_qos = boolean(), + inflight = inflight(), + mqueue = mqueue(), + next_pkt_id = packet_id(), + retry_interval = safty_timeout(), + awaiting_rel = awaiting_rel(), + max_awaiting_rel = non_neg_integer(), + await_rel_timeout = safty_timeout(), + created_at = timestamp() + }, emqx_session:info(Session) ). diff --git a/apps/emqx/test/emqx_router_SUITE.erl b/apps/emqx/test/emqx_router_SUITE.erl index 1128112ff..9729d8ddc 100644 --- a/apps/emqx/test/emqx_router_SUITE.erl +++ b/apps/emqx/test/emqx_router_SUITE.erl @@ -44,7 +44,7 @@ init_per_group(GroupName, Config) -> AppSpecs = [ {emqx, #{ config => mk_config(GroupName), - override_env => [{boot_modules, [router]}] + override_env => [{boot_modules, [broker]}] }} ], Apps = emqx_cth_suite:start(AppSpecs, #{work_dir => WorkDir}), diff --git a/apps/emqx/test/emqx_router_helper_SUITE.erl b/apps/emqx/test/emqx_router_helper_SUITE.erl index 889c8293c..8fe052af8 100644 --- a/apps/emqx/test/emqx_router_helper_SUITE.erl +++ b/apps/emqx/test/emqx_router_helper_SUITE.erl @@ -51,12 +51,12 @@ end_per_group(_GroupName, Config) -> mk_config(routing_schema_v1) -> #{ config => "broker.routing.storage_schema = v1", - override_env => [{boot_modules, [router]}] + override_env => [{boot_modules, [broker]}] }; mk_config(routing_schema_v2) -> #{ config => "broker.routing.storage_schema = v2", - override_env => [{boot_modules, [router]}] + override_env => [{boot_modules, [broker]}] }. init_per_testcase(_TestCase, Config) -> diff --git a/apps/emqx/test/emqx_trace_SUITE.erl b/apps/emqx/test/emqx_trace_SUITE.erl index ce7d7e887..7e932a1d0 100644 --- a/apps/emqx/test/emqx_trace_SUITE.erl +++ b/apps/emqx/test/emqx_trace_SUITE.erl @@ -24,6 +24,7 @@ -include_lib("emqx/include/emqx.hrl"). -include_lib("emqx/include/emqx_trace.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). +-include_lib("kernel/include/file.hrl"). %%-------------------------------------------------------------------- %% Setups @@ -52,6 +53,7 @@ init_per_testcase(_, Config) -> Config. end_per_testcase(_) -> + snabbkaffe:stop(), ok. t_base_create_delete(_Config) -> @@ -454,6 +456,36 @@ t_migrate_trace(_Config) -> ), ok. +%% If no relevant event occurred, the log file size must be exactly 0 after stopping the trace. +t_empty_trace_log_file(_Config) -> + ?check_trace( + begin + Now = erlang:system_time(second), + Name = <<"empty_trace_log">>, + Trace = [ + {<<"name">>, Name}, + {<<"type">>, clientid}, + {<<"clientid">>, <<"test_trace_no_clientid_1">>}, + {<<"start_at">>, Now}, + {<<"end_at">>, Now + 100} + ], + ?wait_async_action( + ?assertMatch({ok, _}, emqx_trace:create(Trace)), + #{?snk_kind := update_trace_done} + ), + ok = emqx_trace_handler_SUITE:filesync(Name, clientid), + {ok, Filename} = emqx_trace:get_trace_filename(Name), + ?assertMatch({ok, #{size := 0}}, emqx_trace:trace_file_detail(Filename)), + ?wait_async_action( + ?assertEqual(ok, emqx_trace:update(Name, false)), + #{?snk_kind := update_trace_done} + ), + ?assertMatch({ok, #{size := 0}}, emqx_trace:trace_file_detail(Filename)), + ?assertEqual(ok, emqx_trace:delete(Name)) + end, + [] + ). + build_new_trace_data() -> Now = erlang:system_time(second), {ok, _} = emqx_trace:create([ diff --git a/apps/emqx/test/emqx_ws_connection_SUITE.erl b/apps/emqx/test/emqx_ws_connection_SUITE.erl index c494bddb2..3a26afec6 100644 --- a/apps/emqx/test/emqx_ws_connection_SUITE.erl +++ b/apps/emqx/test/emqx_ws_connection_SUITE.erl @@ -539,8 +539,17 @@ t_parse_incoming_order(_) -> t_parse_incoming_frame_error(_) -> {Packets, _St} = ?ws_conn:parse_incoming(<<3, 2, 1, 0>>, [], st()), - FrameError = {frame_error, malformed_packet}, - [{incoming, FrameError}] = Packets. + + ?assertMatch( + [ + {incoming, + {frame_error, #{ + header_type := _, + hint := malformed_packet + }}} + ], + Packets + ). t_handle_incomming_frame_error(_) -> FrameError = {frame_error, bad_qos}, diff --git a/apps/emqx_authn/src/emqx_authn.app.src b/apps/emqx_authn/src/emqx_authn.app.src index ae7bea5da..1050bc496 100644 --- a/apps/emqx_authn/src/emqx_authn.app.src +++ b/apps/emqx_authn/src/emqx_authn.app.src @@ -1,7 +1,7 @@ %% -*- mode: erlang -*- {application, emqx_authn, [ {description, "EMQX Authentication"}, - {vsn, "0.1.25"}, + {vsn, "0.1.26"}, {modules, []}, {registered, [emqx_authn_sup, emqx_authn_registry]}, {applications, [ diff --git a/apps/emqx_authn/src/emqx_authn_password_hashing.erl b/apps/emqx_authn/src/emqx_authn_password_hashing.erl index 4954cd66e..66bc6bfc6 100644 --- a/apps/emqx_authn/src/emqx_authn_password_hashing.erl +++ b/apps/emqx_authn/src/emqx_authn_password_hashing.erl @@ -63,6 +63,9 @@ check_password/4 ]). +-define(SALT_ROUNDS_MIN, 5). +-define(SALT_ROUNDS_MAX, 10). + namespace() -> "authn-hash". roots() -> [pbkdf2, bcrypt, bcrypt_rw, simple]. @@ -71,11 +74,12 @@ fields(bcrypt_rw) -> [ {salt_rounds, sc( - integer(), + range(?SALT_ROUNDS_MIN, ?SALT_ROUNDS_MAX), #{ - default => 10, - example => 10, - desc => "Salt rounds for BCRYPT password generation." + default => ?SALT_ROUNDS_MAX, + example => ?SALT_ROUNDS_MAX, + desc => "Work factor for BCRYPT password generation.", + converter => fun salt_rounds_converter/2 } )} ]; @@ -106,6 +110,13 @@ fields(simple) -> {salt_position, fun salt_position/1} ]. +salt_rounds_converter(undefined, _) -> + undefined; +salt_rounds_converter(I, _) when is_integer(I) -> + emqx_utils:clamp(I, ?SALT_ROUNDS_MIN, ?SALT_ROUNDS_MAX); +salt_rounds_converter(X, _) -> + X. + desc(bcrypt_rw) -> "Settings for bcrypt password hashing algorithm (for DB backends with write capability)."; desc(bcrypt) -> diff --git a/apps/emqx_authz/test/emqx_authz_file_SUITE.erl b/apps/emqx_authz/test/emqx_authz_file_SUITE.erl index 396679783..d31935363 100644 --- a/apps/emqx_authz/test/emqx_authz_file_SUITE.erl +++ b/apps/emqx_authz/test/emqx_authz_file_SUITE.erl @@ -44,7 +44,7 @@ init_per_testcase(TestCase, Config) -> {emqx_conf, "authorization.no_match = deny, authorization.cache.enable = false"}, emqx_authz ], - #{work_dir => filename:join(?config(priv_dir, Config), TestCase)} + #{work_dir => emqx_cth_suite:work_dir(TestCase, Config)} ), [{tc_apps, Apps} | Config]. diff --git a/apps/emqx_authz/test/emqx_authz_rich_actions_SUITE.erl b/apps/emqx_authz/test/emqx_authz_rich_actions_SUITE.erl index 8d24b5472..fc597f15b 100644 --- a/apps/emqx_authz/test/emqx_authz_rich_actions_SUITE.erl +++ b/apps/emqx_authz/test/emqx_authz_rich_actions_SUITE.erl @@ -37,7 +37,7 @@ init_per_testcase(TestCase, Config) -> {emqx_conf, "authorization.no_match = deny, authorization.cache.enable = false"}, emqx_authz ], - #{work_dir => filename:join(?config(priv_dir, Config), TestCase)} + #{work_dir => emqx_cth_suite:work_dir(TestCase, Config)} ), [{tc_apps, Apps} | Config]. diff --git a/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl b/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl index 31c6bcab1..f6129c09d 100644 --- a/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl +++ b/apps/emqx_bridge/test/emqx_bridge_api_SUITE.erl @@ -116,13 +116,13 @@ end_per_suite(_Config) -> ok. init_per_group(cluster = Name, Config) -> - Nodes = [NodePrimary | _] = mk_cluster(Name, Config), + Nodes = [NodePrimary | _] = mk_cluster(Config), init_api([{group, Name}, {cluster_nodes, Nodes}, {node, NodePrimary} | Config]); init_per_group(cluster_later_join = Name, Config) -> - Nodes = [NodePrimary | _] = mk_cluster(Name, Config, #{join_to => undefined}), + Nodes = [NodePrimary | _] = mk_cluster(Config, #{join_to => undefined}), init_api([{group, Name}, {cluster_nodes, Nodes}, {node, NodePrimary} | Config]); -init_per_group(Name, Config) -> - WorkDir = filename:join(?config(priv_dir, Config), Name), +init_per_group(_Name, Config) -> + WorkDir = emqx_cth_suite:work_dir(Config), Apps = emqx_cth_suite:start(?APPSPECS ++ [?APPSPEC_DASHBOARD], #{work_dir => WorkDir}), init_api([{group, single}, {group_apps, Apps}, {node, node()} | Config]). @@ -131,10 +131,10 @@ init_api(Config) -> {ok, App} = erpc:call(APINode, emqx_common_test_http, create_default_app, []), [{api, App} | Config]. -mk_cluster(Name, Config) -> - mk_cluster(Name, Config, #{}). +mk_cluster(Config) -> + mk_cluster(Config, #{}). -mk_cluster(Name, Config, Opts) -> +mk_cluster(Config, Opts) -> Node1Apps = ?APPSPECS ++ [?APPSPEC_DASHBOARD], Node2Apps = ?APPSPECS, emqx_cth_cluster:start( @@ -142,7 +142,7 @@ mk_cluster(Name, Config, Opts) -> {emqx_bridge_api_SUITE1, Opts#{role => core, apps => Node1Apps}}, {emqx_bridge_api_SUITE2, Opts#{role => core, apps => Node2Apps}} ], - #{work_dir => filename:join(?config(priv_dir, Config), Name)} + #{work_dir => emqx_cth_suite:work_dir(Config)} ). end_per_group(Group, Config) when diff --git a/apps/emqx_bridge_azure_event_hub/rebar.config b/apps/emqx_bridge_azure_event_hub/rebar.config index 85c39ce01..dbcc8269c 100644 --- a/apps/emqx_bridge_azure_event_hub/rebar.config +++ b/apps/emqx_bridge_azure_event_hub/rebar.config @@ -1,6 +1,6 @@ %% -*- mode: erlang; -*- {erl_opts, [debug_info]}. -{deps, [ {wolff, {git, "https://github.com/kafka4beam/wolff.git", {tag, "1.7.6"}}} +{deps, [ {wolff, {git, "https://github.com/kafka4beam/wolff.git", {tag, "1.7.7"}}} , {kafka_protocol, {git, "https://github.com/kafka4beam/kafka_protocol.git", {tag, "4.1.3"}}} , {brod_gssapi, {git, "https://github.com/kafka4beam/brod_gssapi.git", {tag, "v0.1.0"}}} , {brod, {git, "https://github.com/kafka4beam/brod.git", {tag, "3.16.8"}}} diff --git a/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra.app.src b/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra.app.src index de790ab46..26028e8ab 100644 --- a/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra.app.src +++ b/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra.app.src @@ -1,6 +1,6 @@ {application, emqx_bridge_cassandra, [ {description, "EMQX Enterprise Cassandra Bridge"}, - {vsn, "0.1.3"}, + {vsn, "0.1.4"}, {registered, []}, {applications, [ kernel, diff --git a/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra_connector.erl b/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra_connector.erl index 2cbf0d6fe..0610ee743 100644 --- a/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra_connector.erl +++ b/apps/emqx_bridge_cassandra/src/emqx_bridge_cassandra_connector.erl @@ -94,7 +94,6 @@ on_start( #{ servers := Servers0, keyspace := Keyspace, - username := Username, pool_size := PoolSize, ssl := SSL } = Config @@ -114,12 +113,12 @@ on_start( Options = [ {nodes, Servers}, - {username, Username}, - {password, emqx_secret:wrap(maps:get(password, Config, ""))}, {keyspace, Keyspace}, {auto_reconnect, ?AUTO_RECONNECT_INTERVAL}, {pool_size, PoolSize} ], + Options1 = maybe_add_opt(username, Config, Options), + Options2 = maybe_add_opt(password, Config, Options1, _IsSensitive = true), SslOpts = case maps:get(enable, SSL) of @@ -132,7 +131,7 @@ on_start( [] end, State = parse_prepare_cql(Config), - case emqx_resource_pool:start(InstId, ?MODULE, Options ++ SslOpts) of + case emqx_resource_pool:start(InstId, ?MODULE, Options2 ++ SslOpts) of ok -> {ok, init_prepare(State#{pool_name => InstId, prepare_statement => #{}})}; {error, Reason} -> @@ -513,3 +512,19 @@ maybe_assign_type(V) when is_integer(V) -> maybe_assign_type(V) when is_float(V) -> {double, V}; maybe_assign_type(V) -> V. + +maybe_add_opt(Key, Conf, Opts) -> + maybe_add_opt(Key, Conf, Opts, _IsSensitive = false). + +maybe_add_opt(Key, Conf, Opts, IsSensitive) -> + case Conf of + #{Key := Val} -> + [{Key, maybe_wrap(IsSensitive, Val)} | Opts]; + _ -> + Opts + end. + +maybe_wrap(false = _IsSensitive, Val) -> + Val; +maybe_wrap(true, Val) -> + emqx_secret:wrap(Val). diff --git a/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_connector_SUITE.erl b/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_connector_SUITE.erl index bceae1fd2..fcd482b47 100644 --- a/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_connector_SUITE.erl +++ b/apps/emqx_bridge_cassandra/test/emqx_bridge_cassandra_connector_SUITE.erl @@ -7,15 +7,17 @@ -compile(nowarn_export_all). -compile(export_all). +-include_lib("common_test/include/ct.hrl"). -include("emqx_bridge_cassandra.hrl"). -include("emqx_connector/include/emqx_connector.hrl"). -include_lib("eunit/include/eunit.hrl"). -include_lib("emqx/include/emqx.hrl"). -include_lib("stdlib/include/assert.hrl"). -%% Cassandra server defined at `.ci/docker-compose-file/docker-compose-cassandra-tcp.yaml` +%% Cassandra servers are defined at `.ci/docker-compose-file/docker-compose-cassandra.yaml` %% You can change it to `127.0.0.1`, if you run this SUITE locally -define(CASSANDRA_HOST, "cassandra"). +-define(CASSANDRA_HOST_NOAUTH, "cassandra_noauth"). -define(CASSANDRA_RESOURCE_MOD, emqx_bridge_cassandra_connector). %% This test SUITE requires a running cassandra instance. If you don't want to @@ -32,40 +34,58 @@ -define(CASSA_PASSWORD, <<"cassandra">>). all() -> - emqx_common_test_helpers:all(?MODULE). + [ + {group, auth}, + {group, noauth} + ]. groups() -> - []. + TCs = emqx_common_test_helpers:all(?MODULE), + [ + {auth, TCs}, + {noauth, TCs} + ]. -cassandra_servers() -> +cassandra_servers(CassandraHost) -> lists:map( fun(#{hostname := Host, port := Port}) -> {Host, Port} end, emqx_schema:parse_servers( - iolist_to_binary([?CASSANDRA_HOST, ":", erlang:integer_to_list(?CASSANDRA_DEFAULT_PORT)]), + iolist_to_binary([CassandraHost, ":", erlang:integer_to_list(?CASSANDRA_DEFAULT_PORT)]), #{default_port => ?CASSANDRA_DEFAULT_PORT} ) ). init_per_suite(Config) -> - case - emqx_common_test_helpers:is_tcp_server_available(?CASSANDRA_HOST, ?CASSANDRA_DEFAULT_PORT) - of + ok = emqx_common_test_helpers:start_apps([emqx_conf]), + ok = emqx_connector_test_helpers:start_apps([emqx_resource]), + {ok, _} = application:ensure_all_started(emqx_connector), + Config. + +init_per_group(Group, Config) -> + {CassandraHost, AuthOpts} = + case Group of + auth -> + {?CASSANDRA_HOST, [{username, ?CASSA_USERNAME}, {password, ?CASSA_PASSWORD}]}; + noauth -> + {?CASSANDRA_HOST_NOAUTH, []} + end, + case emqx_common_test_helpers:is_tcp_server_available(CassandraHost, ?CASSANDRA_DEFAULT_PORT) of true -> - ok = emqx_common_test_helpers:start_apps([emqx_conf]), - ok = emqx_connector_test_helpers:start_apps([emqx_resource]), - {ok, _} = application:ensure_all_started(emqx_connector), %% keyspace `mqtt` must be created in advance {ok, Conn} = ecql:connect([ - {nodes, cassandra_servers()}, - {username, ?CASSA_USERNAME}, - {password, ?CASSA_PASSWORD}, + {nodes, cassandra_servers(CassandraHost)}, {keyspace, "mqtt"} + | AuthOpts ]), ecql:close(Conn), - Config; + [ + {cassa_host, CassandraHost}, + {cassa_auth_opts, AuthOpts} + | Config + ]; false -> case os:getenv("IS_CI") of "yes" -> @@ -75,6 +95,9 @@ init_per_suite(Config) -> end end. +end_per_group(_Group, _Config) -> + ok. + end_per_suite(_Config) -> ok = emqx_common_test_helpers:stop_apps([emqx_conf]), ok = emqx_connector_test_helpers:stop_apps([emqx_resource]), @@ -90,10 +113,10 @@ end_per_testcase(_, _Config) -> %% cases %%-------------------------------------------------------------------- -t_lifecycle(_Config) -> +t_lifecycle(Config) -> perform_lifecycle_check( <<"emqx_connector_cassandra_SUITE">>, - cassandra_config() + cassandra_config(Config) ). show(X) -> @@ -168,25 +191,25 @@ perform_lifecycle_check(ResourceId, InitialConfig) -> %% utils %%-------------------------------------------------------------------- -cassandra_config() -> - Config = - #{ +cassandra_config(Config) -> + Host = ?config(cassa_host, Config), + AuthOpts = maps:from_list(?config(cassa_auth_opts, Config)), + CassConfig = + AuthOpts#{ auto_reconnect => true, keyspace => <<"mqtt">>, - username => ?CASSA_USERNAME, - password => ?CASSA_PASSWORD, pool_size => 8, servers => iolist_to_binary( io_lib:format( "~s:~b", [ - ?CASSANDRA_HOST, + Host, ?CASSANDRA_DEFAULT_PORT ] ) ) }, - #{<<"config">> => Config}. + #{<<"config">> => CassConfig}. test_query_no_params() -> {query, <<"SELECT count(1) AS T FROM system.local">>}. diff --git a/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub.app.src b/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub.app.src index c7dcea5c0..9afc0f05e 100644 --- a/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub.app.src +++ b/apps/emqx_bridge_gcp_pubsub/src/emqx_bridge_gcp_pubsub.app.src @@ -1,6 +1,6 @@ {application, emqx_bridge_gcp_pubsub, [ {description, "EMQX Enterprise GCP Pub/Sub Bridge"}, - {vsn, "0.1.7"}, + {vsn, "0.1.8"}, {registered, []}, {applications, [ kernel, diff --git a/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_consumer_SUITE.erl b/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_consumer_SUITE.erl index 8dc6cd7c4..60c54ebda 100644 --- a/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_consumer_SUITE.erl +++ b/apps/emqx_bridge_gcp_pubsub/test/emqx_bridge_gcp_pubsub_consumer_SUITE.erl @@ -577,7 +577,7 @@ cluster(Config) -> {schema_mod, emqx_enterprise_schema}, {env_handler, fun (emqx) -> - application:set_env(emqx, boot_modules, [broker, router]), + application:set_env(emqx, boot_modules, [broker]), ok; (emqx_conf) -> ok; diff --git a/apps/emqx_bridge_kafka/rebar.config b/apps/emqx_bridge_kafka/rebar.config index 945ccbdba..8246fa8cf 100644 --- a/apps/emqx_bridge_kafka/rebar.config +++ b/apps/emqx_bridge_kafka/rebar.config @@ -1,6 +1,6 @@ %% -*- mode: erlang; -*- {erl_opts, [debug_info]}. -{deps, [ {wolff, {git, "https://github.com/kafka4beam/wolff.git", {tag, "1.7.6"}}} +{deps, [ {wolff, {git, "https://github.com/kafka4beam/wolff.git", {tag, "1.7.7"}}} , {kafka_protocol, {git, "https://github.com/kafka4beam/kafka_protocol.git", {tag, "4.1.3"}}} , {brod_gssapi, {git, "https://github.com/kafka4beam/brod_gssapi.git", {tag, "v0.1.0"}}} , {brod, {git, "https://github.com/kafka4beam/brod.git", {tag, "3.16.8"}}} diff --git a/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.app.src b/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.app.src index 55b02560b..835932ddb 100644 --- a/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.app.src +++ b/apps/emqx_bridge_kafka/src/emqx_bridge_kafka.app.src @@ -1,7 +1,7 @@ %% -*- mode: erlang -*- {application, emqx_bridge_kafka, [ {description, "EMQX Enterprise Kafka Bridge"}, - {vsn, "0.1.8"}, + {vsn, "0.1.9"}, {registered, [emqx_bridge_kafka_consumer_sup]}, {applications, [ kernel, diff --git a/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_impl_consumer_SUITE.erl b/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_impl_consumer_SUITE.erl index 1691fa6a4..60a571b2d 100644 --- a/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_impl_consumer_SUITE.erl +++ b/apps/emqx_bridge_kafka/test/emqx_bridge_kafka_impl_consumer_SUITE.erl @@ -1101,7 +1101,7 @@ cluster(Config) -> {load_apps, [emqx_machine]}, {env_handler, fun (emqx) -> - application:set_env(emqx, boot_modules, [broker, router]), + application:set_env(emqx, boot_modules, [broker]), ExtraEnvHandlerHook(), ok; (emqx_conf) -> diff --git a/apps/emqx_bridge_kinesis/src/emqx_bridge_kinesis.app.src b/apps/emqx_bridge_kinesis/src/emqx_bridge_kinesis.app.src index 3eb923b5d..6066e2495 100644 --- a/apps/emqx_bridge_kinesis/src/emqx_bridge_kinesis.app.src +++ b/apps/emqx_bridge_kinesis/src/emqx_bridge_kinesis.app.src @@ -1,6 +1,6 @@ {application, emqx_bridge_kinesis, [ {description, "EMQX Enterprise Amazon Kinesis Bridge"}, - {vsn, "0.1.1"}, + {vsn, "0.1.2"}, {registered, []}, {applications, [ kernel, diff --git a/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_impl_producer_SUITE.erl b/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_impl_producer_SUITE.erl index fb358906f..44d28c31a 100644 --- a/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_impl_producer_SUITE.erl +++ b/apps/emqx_bridge_pulsar/test/emqx_bridge_pulsar_impl_producer_SUITE.erl @@ -536,7 +536,7 @@ cluster(Config) -> {schema_mod, emqx_enterprise_schema}, {env_handler, fun (emqx) -> - application:set_env(emqx, boot_modules, [broker, router]), + application:set_env(emqx, boot_modules, [broker]), ok; (emqx_conf) -> ok; diff --git a/apps/emqx_bridge_sqlserver/src/emqx_bridge_sqlserver.app.src b/apps/emqx_bridge_sqlserver/src/emqx_bridge_sqlserver.app.src index 3aa8b3b68..530578d73 100644 --- a/apps/emqx_bridge_sqlserver/src/emqx_bridge_sqlserver.app.src +++ b/apps/emqx_bridge_sqlserver/src/emqx_bridge_sqlserver.app.src @@ -1,6 +1,6 @@ {application, emqx_bridge_sqlserver, [ {description, "EMQX Enterprise SQL Server Bridge"}, - {vsn, "0.1.2"}, + {vsn, "0.1.3"}, {registered, []}, {applications, [kernel, stdlib, emqx_resource, odbc]}, {env, []}, diff --git a/apps/emqx_bridge_sqlserver/src/emqx_bridge_sqlserver_connector.erl b/apps/emqx_bridge_sqlserver/src/emqx_bridge_sqlserver_connector.erl index b6ad15ab9..3e1abbeba 100644 --- a/apps/emqx_bridge_sqlserver/src/emqx_bridge_sqlserver_connector.erl +++ b/apps/emqx_bridge_sqlserver/src/emqx_bridge_sqlserver_connector.erl @@ -173,7 +173,6 @@ on_start( #{ server := Server, username := Username, - password := Password, driver := Driver, database := Database, pool_size := PoolSize, @@ -200,7 +199,7 @@ on_start( Options = [ {server, to_bin(Server)}, {username, Username}, - {password, Password}, + {password, emqx_secret:wrap(maps:get(password, Config, ""))}, {driver, Driver}, {database, Database}, {pool_size, PoolSize} @@ -320,7 +319,7 @@ conn_str([{database, Database} | Opts], Acc) -> conn_str([{username, Username} | Opts], Acc) -> conn_str(Opts, ["UID=" ++ str(Username) | Acc]); conn_str([{password, Password} | Opts], Acc) -> - conn_str(Opts, ["PWD=" ++ str(Password) | Acc]); + conn_str(Opts, ["PWD=" ++ str(emqx_secret:unwrap(Password)) | Acc]); conn_str([{_, _} | Opts], Acc) -> conn_str(Opts, Acc). diff --git a/apps/emqx_connector/src/emqx_connector.app.src b/apps/emqx_connector/src/emqx_connector.app.src index 397cd0093..b43122b36 100644 --- a/apps/emqx_connector/src/emqx_connector.app.src +++ b/apps/emqx_connector/src/emqx_connector.app.src @@ -1,7 +1,7 @@ %% -*- mode: erlang -*- {application, emqx_connector, [ {description, "EMQX Data Integration Connectors"}, - {vsn, "0.1.30"}, + {vsn, "0.1.31"}, {registered, []}, {mod, {emqx_connector_app, []}}, {applications, [ diff --git a/apps/emqx_connector/src/emqx_connector_pgsql.erl b/apps/emqx_connector/src/emqx_connector_pgsql.erl index 04ba4fd51..5cc25bfa1 100644 --- a/apps/emqx_connector/src/emqx_connector_pgsql.erl +++ b/apps/emqx_connector/src/emqx_connector_pgsql.erl @@ -85,13 +85,10 @@ server() -> adjust_fields(Fields) -> lists:map( fun - ({username, OrigUsernameFn}) -> - {username, fun - (required) -> - true; - (Any) -> - OrigUsernameFn(Any) - end}; + ({username, Sc}) -> + %% to please dialyzer... + Override = #{type => hocon_schema:field_schema(Sc, type), required => true}, + {username, hocon_schema:override(Sc, Override)}; (Field) -> Field end, diff --git a/apps/emqx_ctl/src/emqx_ctl.app.src b/apps/emqx_ctl/src/emqx_ctl.app.src index 1196f17a5..c3a55ef7b 100644 --- a/apps/emqx_ctl/src/emqx_ctl.app.src +++ b/apps/emqx_ctl/src/emqx_ctl.app.src @@ -1,6 +1,6 @@ {application, emqx_ctl, [ {description, "Backend for emqx_ctl script"}, - {vsn, "0.1.2"}, + {vsn, "0.1.3"}, {registered, []}, {mod, {emqx_ctl_app, []}}, {applications, [ diff --git a/apps/emqx_ctl/src/emqx_ctl.erl b/apps/emqx_ctl/src/emqx_ctl.erl index d1a7ed1d7..a51f4919c 100644 --- a/apps/emqx_ctl/src/emqx_ctl.erl +++ b/apps/emqx_ctl/src/emqx_ctl.erl @@ -119,8 +119,7 @@ run_command(Cmd, Args) when is_atom(Cmd) -> case lookup_command(Cmd) of [{Mod, Fun}] -> try - _ = apply(Mod, Fun, [Args]), - ok + apply(Mod, Fun, [Args]) catch _:Reason:Stacktrace -> ?LOG_ERROR(#{ diff --git a/apps/emqx_durable_storage/src/emqx_ds.erl b/apps/emqx_durable_storage/src/emqx_ds.erl index 9eccf8c16..0a61cad43 100644 --- a/apps/emqx_durable_storage/src/emqx_ds.erl +++ b/apps/emqx_durable_storage/src/emqx_ds.erl @@ -15,6 +15,9 @@ %%-------------------------------------------------------------------- -module(emqx_ds). +-include_lib("stdlib/include/ms_transform.hrl"). +-include_lib("snabbkaffe/include/snabbkaffe.hrl"). + %% API: -export([ensure_shard/2]). %% Messages: @@ -27,6 +30,7 @@ session_drop/1, session_suspend/1, session_add_iterator/2, + session_get_iterator_id/2, session_del_iterator/2, session_stats/0 ]). @@ -39,6 +43,8 @@ message_stats/0, message_store_opts/0, session_id/0, + replay/0, + replay_id/0, iterator_id/0, iterator/0, shard/0, @@ -52,11 +58,13 @@ %% Type declarations %%================================================================================ --type session_id() :: emqx_types:clientid(). +%% Currently, this is the clientid. We avoid `emqx_types:clientid()' because that can be +%% an atom, in theory (?). +-type session_id() :: binary(). -type iterator() :: term(). --opaque iterator_id() :: binary(). +-type iterator_id() :: binary(). %%-type session() :: #session{}. @@ -73,9 +81,17 @@ %% Timestamp %% Earliest possible timestamp is 0. -%% TODO granularity? +%% TODO granularity? Currently, we should always use micro second, as that's the unit we +%% use in emqx_guid. Otherwise, the iterators won't match the message timestamps. -type time() :: non_neg_integer(). +-type replay_id() :: binary(). + +-type replay() :: { + _TopicFilter :: emqx_topic:words(), + _StartTime :: time() +}. + %%================================================================================ %% API funcions %%================================================================================ @@ -121,23 +137,20 @@ message_stats() -> %% %% Note: session API doesn't handle session takeovers, it's the job of %% the broker. --spec session_open(emqx_types:clientid()) -> {_New :: boolean(), session_id(), [iterator_id()]}. +-spec session_open(emqx_types:clientid()) -> {_New :: boolean(), session_id()}. session_open(ClientID) -> - {atomic, Ret} = - mria:transaction( - ?DS_SHARD, - fun() -> - case mnesia:read(?SESSION_TAB, ClientID) of - [#session{iterators = Iterators}] -> - {false, ClientID, Iterators}; - [] -> - Session = #session{id = ClientID, iterators = []}, - mnesia:write(?SESSION_TAB, Session, write), - {true, ClientID, []} - end + {atomic, Res} = + mria:transaction(?DS_SHARD, fun() -> + case mnesia:read(?SESSION_TAB, ClientID, write) of + [#session{}] -> + {false, ClientID}; + [] -> + Session = #session{id = ClientID}, + mnesia:write(?SESSION_TAB, Session, write), + {true, ClientID} end - ), - Ret. + end), + Res. %% @doc Called when a client reconnects with `clean session=true' or %% during session GC @@ -146,6 +159,7 @@ session_drop(ClientID) -> {atomic, ok} = mria:transaction( ?DS_SHARD, fun() -> + %% TODO: ensure all iterators from this clientid are closed? mnesia:delete({?SESSION_TAB, ClientID}) end ), @@ -160,19 +174,57 @@ session_suspend(_SessionId) -> %% @doc Called when a client subscribes to a topic. Idempotent. -spec session_add_iterator(session_id(), emqx_topic:words()) -> - {ok, iterator_id()} | {error, session_not_found}. -session_add_iterator(_SessionId, _TopicFilter) -> - %% TODO - {ok, <<"">>}. + {ok, iterator_id(), time(), _IsNew :: boolean()}. +session_add_iterator(DSSessionId, TopicFilter) -> + IteratorRefId = {DSSessionId, TopicFilter}, + {atomic, Res} = + mria:transaction(?DS_SHARD, fun() -> + case mnesia:read(?ITERATOR_REF_TAB, IteratorRefId, write) of + [] -> + {IteratorId, StartMS} = new_iterator_id(DSSessionId), + IteratorRef = #iterator_ref{ + ref_id = IteratorRefId, + it_id = IteratorId, + start_time = StartMS + }, + ok = mnesia:write(?ITERATOR_REF_TAB, IteratorRef, write), + ?tp( + ds_session_subscription_added, + #{iterator_id => IteratorId, session_id => DSSessionId} + ), + IsNew = true, + {ok, IteratorId, StartMS, IsNew}; + [#iterator_ref{it_id = IteratorId, start_time = StartMS}] -> + ?tp( + ds_session_subscription_present, + #{iterator_id => IteratorId, session_id => DSSessionId} + ), + IsNew = false, + {ok, IteratorId, StartMS, IsNew} + end + end), + Res. -%% @doc Called when a client unsubscribes from a topic. Returns `true' -%% if the session contained the subscription or `false' if it wasn't -%% subscribed. --spec session_del_iterator(session_id(), emqx_topic:words()) -> - {ok, boolean()} | {error, session_not_found}. -session_del_iterator(_SessionId, _TopicFilter) -> - %% TODO - {ok, false}. +-spec session_get_iterator_id(session_id(), emqx_topic:words()) -> + {ok, iterator_id()} | {error, not_found}. +session_get_iterator_id(DSSessionId, TopicFilter) -> + IteratorRefId = {DSSessionId, TopicFilter}, + case mnesia:dirty_read(?ITERATOR_REF_TAB, IteratorRefId) of + [] -> + {error, not_found}; + [#iterator_ref{it_id = IteratorId}] -> + {ok, IteratorId} + end. + +%% @doc Called when a client unsubscribes from a topic. +-spec session_del_iterator(session_id(), emqx_topic:words()) -> ok. +session_del_iterator(DSSessionId, TopicFilter) -> + IteratorRefId = {DSSessionId, TopicFilter}, + {atomic, ok} = + mria:transaction(?DS_SHARD, fun() -> + mnesia:delete(?ITERATOR_REF_TAB, IteratorRefId, write) + end), + ok. -spec session_stats() -> #{}. session_stats() -> @@ -201,3 +253,9 @@ iterator_stats() -> %%================================================================================ %% Internal functions %%================================================================================ + +-spec new_iterator_id(session_id()) -> {iterator_id(), time()}. +new_iterator_id(DSSessionId) -> + NowMS = erlang:system_time(microsecond), + IteratorId = <>, + {IteratorId, NowMS}. diff --git a/apps/emqx_durable_storage/src/emqx_ds_app.erl b/apps/emqx_durable_storage/src/emqx_ds_app.erl index 216e979ee..cbcdb0b8c 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_app.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_app.erl @@ -25,7 +25,18 @@ init_mnesia() -> {record_name, session}, {attributes, record_info(fields, session)} ] - ). + ), + ok = mria:create_table( + ?ITERATOR_REF_TAB, + [ + {rlog_shard, ?DS_SHARD}, + {type, ordered_set}, + {storage, storage()}, + {record_name, iterator_ref}, + {attributes, record_info(fields, iterator_ref)} + ] + ), + ok. storage() -> case mria:rocksdb_backend_available() of diff --git a/apps/emqx_durable_storage/src/emqx_ds_int.hrl b/apps/emqx_durable_storage/src/emqx_ds_int.hrl index 96688ede6..47493bd0b 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_int.hrl +++ b/apps/emqx_durable_storage/src/emqx_ds_int.hrl @@ -17,11 +17,20 @@ -define(EMQX_DS_HRL, true). -define(SESSION_TAB, emqx_ds_session). +-define(ITERATOR_REF_TAB, emqx_ds_iterator_ref). -define(DS_SHARD, emqx_ds_shard). -record(session, { + %% same as clientid id :: emqx_ds:session_id(), - iterators :: [{emqx_topic:words(), emqx_ds:iterator_id()}] + %% for future usage + props = #{} :: map() +}). + +-record(iterator_ref, { + ref_id :: {emqx_ds:session_id(), emqx_topic:words()}, + it_id :: emqx_ds:iterator_id(), + start_time :: emqx_ds:time() }). -endif. diff --git a/apps/emqx_durable_storage/src/emqx_ds_replay.erl b/apps/emqx_durable_storage/src/emqx_ds_replay.erl deleted file mode 100644 index a66cee7fd..000000000 --- a/apps/emqx_durable_storage/src/emqx_ds_replay.erl +++ /dev/null @@ -1,36 +0,0 @@ -%%-------------------------------------------------------------------- -%% Copyright (c) 2022-2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%%-------------------------------------------------------------------- --module(emqx_ds_replay). - -%% API: --export([]). - --export_type([replay_id/0, replay/0]). - -%%================================================================================ -%% Type declarations -%%================================================================================ - --type replay_id() :: binary(). - --type replay() :: { - _TopicFilter :: emqx_ds:topic(), - _StartTime :: emqx_ds:time() -}. - -%%================================================================================ -%% API funcions -%%================================================================================ - -%%================================================================================ -%% behaviour callbacks -%%================================================================================ - -%%================================================================================ -%% Internal exports -%%================================================================================ - -%%================================================================================ -%% Internal functions -%%================================================================================ diff --git a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl index 017423b02..47c29e170 100644 --- a/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl +++ b/apps/emqx_durable_storage/src/emqx_ds_storage_layer.erl @@ -13,7 +13,15 @@ -export([make_iterator/2, next/1]). --export([preserve_iterator/2, restore_iterator/2, discard_iterator/2]). +-export([ + preserve_iterator/2, + restore_iterator/2, + discard_iterator/2, + ensure_iterator/3, + discard_iterator_prefix/2, + list_iterator_prefix/2, + foldl_iterator_prefix/4 +]). %% behaviour callbacks: -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -160,10 +168,10 @@ next(It = #it{module = Mod, data = ItData}) -> end end. --spec preserve_iterator(iterator(), emqx_ds:replay_id()) -> +-spec preserve_iterator(iterator(), emqx_ds:iterator_id()) -> ok | {error, _TODO}. -preserve_iterator(It = #it{}, ReplayID) -> - iterator_put_state(ReplayID, It). +preserve_iterator(It = #it{}, IteratorID) -> + iterator_put_state(IteratorID, It). -spec restore_iterator(emqx_ds:shard(), emqx_ds:replay_id()) -> {ok, iterator()} | {error, _TODO}. @@ -177,11 +185,50 @@ restore_iterator(Shard, ReplayID) -> Error end. +-spec ensure_iterator(emqx_ds:shard(), emqx_ds:iterator_id(), emqx_ds:replay()) -> + {ok, iterator()} | {error, _TODO}. +ensure_iterator(Shard, IteratorID, Replay = {_TopicFilter, _StartMS}) -> + case restore_iterator(Shard, IteratorID) of + {ok, It} -> + {ok, It}; + {error, not_found} -> + {ok, It} = make_iterator(Shard, Replay), + ok = emqx_ds_storage_layer:preserve_iterator(It, IteratorID), + {ok, It}; + Error -> + Error + end. + -spec discard_iterator(emqx_ds:shard(), emqx_ds:replay_id()) -> ok | {error, _TODO}. discard_iterator(Shard, ReplayID) -> iterator_delete(Shard, ReplayID). +-spec discard_iterator_prefix(emqx_ds:shard(), binary()) -> + ok | {error, _TODO}. +discard_iterator_prefix(Shard, KeyPrefix) -> + case do_discard_iterator_prefix(Shard, KeyPrefix) of + {ok, _} -> ok; + Error -> Error + end. + +-spec list_iterator_prefix( + emqx_ds:shard(), + binary() +) -> {ok, [emqx_ds:iterator_id()]} | {error, _TODO}. +list_iterator_prefix(Shard, KeyPrefix) -> + do_list_iterator_prefix(Shard, KeyPrefix). + +-spec foldl_iterator_prefix( + emqx_ds:shard(), + binary(), + fun((_Key :: binary(), _Value :: binary(), Acc) -> Acc), + Acc +) -> {ok, Acc} | {error, _TODO} when + Acc :: term(). +foldl_iterator_prefix(Shard, KeyPrefix, Fn, Acc) -> + do_foldl_iterator_prefix(Shard, KeyPrefix, Fn, Acc). + %%================================================================================ %% behaviour callbacks %%================================================================================ @@ -344,7 +391,11 @@ open_restore_iterator(#{module := Mod, data := Data}, It = #it{replay = Replay}, %% --define(KEY_REPLAY_STATE(ReplayID), <<(ReplayID)/binary, "rs">>). +-define(KEY_REPLAY_STATE(IteratorId), <<(IteratorId)/binary, "rs">>). +-define(KEY_REPLAY_STATE_PAT(KeyReplayState), begin + <> = (KeyReplayState), + IteratorId +end). -define(ITERATION_WRITE_OPTS, []). -define(ITERATION_READ_OPTS, []). @@ -391,6 +442,44 @@ restore_iterator_state( It = #it{shard = Shard, gen = Gen, replay = {TopicFilter, StartTime}}, open_restore_iterator(meta_get_gen(Shard, Gen), It, State). +do_list_iterator_prefix(Shard, KeyPrefix) -> + Fn = fun(K0, _V, Acc) -> + K = ?KEY_REPLAY_STATE_PAT(K0), + [K | Acc] + end, + do_foldl_iterator_prefix(Shard, KeyPrefix, Fn, []). + +do_discard_iterator_prefix(Shard, KeyPrefix) -> + #db{handle = DBHandle, cf_iterator = CF} = meta_lookup(Shard, db), + Fn = fun(K, _V, _Acc) -> ok = rocksdb:delete(DBHandle, CF, K, ?ITERATION_WRITE_OPTS) end, + do_foldl_iterator_prefix(Shard, KeyPrefix, Fn, ok). + +do_foldl_iterator_prefix(Shard, KeyPrefix, Fn, Acc) -> + #db{handle = Handle, cf_iterator = CF} = meta_lookup(Shard, db), + case rocksdb:iterator(Handle, CF, ?ITERATION_READ_OPTS) of + {ok, It} -> + NextAction = {seek, KeyPrefix}, + do_foldl_iterator_prefix(Handle, CF, It, KeyPrefix, NextAction, Fn, Acc); + Error -> + Error + end. + +do_foldl_iterator_prefix(DBHandle, CF, It, KeyPrefix, NextAction, Fn, Acc) -> + case rocksdb:iterator_move(It, NextAction) of + {ok, K = <>, V} -> + NewAcc = Fn(K, V, Acc), + do_foldl_iterator_prefix(DBHandle, CF, It, KeyPrefix, next, Fn, NewAcc); + {ok, _K, _V} -> + ok = rocksdb:iterator_close(It), + {ok, Acc}; + {error, invalid_iterator} -> + ok = rocksdb:iterator_close(It), + {ok, Acc}; + Error -> + ok = rocksdb:iterator_close(It), + Error + end. + %% Functions for dealing with the metadata stored persistently in rocksdb -define(CURRENT_GEN, <<"current">>). diff --git a/apps/emqx_durable_storage/src/emqx_durable_storage.app.src b/apps/emqx_durable_storage/src/emqx_durable_storage.app.src index ecf9dd270..367ade691 100644 --- a/apps/emqx_durable_storage/src/emqx_durable_storage.app.src +++ b/apps/emqx_durable_storage/src/emqx_durable_storage.app.src @@ -2,7 +2,7 @@ {application, emqx_durable_storage, [ {description, "Message persistence and subscription replays for EMQX"}, % strict semver, bump manually! - {vsn, "0.1.2"}, + {vsn, "0.1.3"}, {modules, []}, {registered, []}, {applications, [kernel, stdlib, rocksdb, gproc, mria]}, diff --git a/apps/emqx_durable_storage/test/props/emqx_ds_message_storage_bitmask_shim.erl b/apps/emqx_durable_storage/test/props/emqx_ds_message_storage_bitmask_shim.erl index 59668ca01..e9daf2581 100644 --- a/apps/emqx_durable_storage/test/props/emqx_ds_message_storage_bitmask_shim.erl +++ b/apps/emqx_durable_storage/test/props/emqx_ds_message_storage_bitmask_shim.erl @@ -14,6 +14,8 @@ -opaque t() :: ets:tid(). +-export_type([t/0]). + -spec open() -> t(). open() -> ets:new(?MODULE, [ordered_set, {keypos, 1}]). diff --git a/apps/emqx_exhook/test/props/prop_exhook_hooks.erl b/apps/emqx_exhook/test/props/prop_exhook_hooks.erl index 34d7a4342..2c9b5bb06 100644 --- a/apps/emqx_exhook/test/props/prop_exhook_hooks.erl +++ b/apps/emqx_exhook/test/props/prop_exhook_hooks.erl @@ -642,7 +642,7 @@ unsub_properties() -> #{}. shutdown_reason() -> - oneof([utf8(), {shutdown, emqx_proper_types:limited_atom()}]). + oneof([utf8(), {shutdown, emqx_proper_types:limited_latin_atom()}]). authresult() -> ?LET( diff --git a/apps/emqx_ft/src/emqx_ft.app.src b/apps/emqx_ft/src/emqx_ft.app.src index 8518958e0..f75b1805b 100644 --- a/apps/emqx_ft/src/emqx_ft.app.src +++ b/apps/emqx_ft/src/emqx_ft.app.src @@ -1,6 +1,6 @@ {application, emqx_ft, [ {description, "EMQX file transfer over MQTT"}, - {vsn, "0.1.5"}, + {vsn, "0.1.6"}, {registered, []}, {mod, {emqx_ft_app, []}}, {applications, [ diff --git a/apps/emqx_ft/src/emqx_ft.erl b/apps/emqx_ft/src/emqx_ft.erl index 41046907b..41020e76f 100644 --- a/apps/emqx_ft/src/emqx_ft.erl +++ b/apps/emqx_ft/src/emqx_ft.erl @@ -18,7 +18,9 @@ -include_lib("emqx/include/emqx.hrl"). -include_lib("emqx/include/emqx_mqtt.hrl"). +-include_lib("emqx/include/emqx_channel.hrl"). -include_lib("emqx/include/emqx_hooks.hrl"). + -include_lib("snabbkaffe/include/trace.hrl"). -export([ @@ -28,7 +30,10 @@ -export([ on_message_publish/1, - on_message_puback/4 + on_message_puback/4, + on_client_timeout/3, + on_process_down/4, + on_channel_unregistered/1 ]). -export([ @@ -36,8 +41,6 @@ encode_filemeta/1 ]). --export([on_complete/4]). - -export_type([ clientid/0, transfer/0, @@ -85,17 +88,29 @@ checksum => checksum() }. +-define(FT_EVENT(EVENT), {?MODULE, EVENT}). + %%-------------------------------------------------------------------- %% API for app %%-------------------------------------------------------------------- hook() -> ok = emqx_hooks:put('message.publish', {?MODULE, on_message_publish, []}, ?HP_LOWEST), - ok = emqx_hooks:put('message.puback', {?MODULE, on_message_puback, []}, ?HP_LOWEST). + ok = emqx_hooks:put('message.puback', {?MODULE, on_message_puback, []}, ?HP_LOWEST), + ok = emqx_hooks:put('client.timeout', {?MODULE, on_client_timeout, []}, ?HP_LOWEST), + ok = emqx_hooks:put( + 'client.monitored_process_down', {?MODULE, on_process_down, []}, ?HP_LOWEST + ), + ok = emqx_hooks:put( + 'cm.channel.unregistered', {?MODULE, on_channel_unregistered, []}, ?HP_LOWEST + ). unhook() -> ok = emqx_hooks:del('message.publish', {?MODULE, on_message_publish}), - ok = emqx_hooks:del('message.puback', {?MODULE, on_message_puback}). + ok = emqx_hooks:del('message.puback', {?MODULE, on_message_puback}), + ok = emqx_hooks:del('client.timeout', {?MODULE, on_client_timeout}), + ok = emqx_hooks:del('client.monitored_process_down', {?MODULE, on_process_down}), + ok = emqx_hooks:del('cm.channel.unregistered', {?MODULE, on_channel_unregistered}). %%-------------------------------------------------------------------- %% API @@ -145,6 +160,25 @@ on_message_puback(PacketId, #message{topic = Topic} = Msg, _PubRes, _RC) -> ignore end. +on_channel_unregistered(ChannelPid) -> + ok = emqx_ft_async_reply:deregister_all(ChannelPid). + +on_client_timeout(_TRef, ?FT_EVENT({MRef, PacketId}), Acc) -> + _ = erlang:demonitor(MRef, [flush]), + _ = emqx_ft_async_reply:take_by_mref(MRef), + {stop, [?REPLY_OUTGOING(?PUBACK_PACKET(PacketId, ?RC_UNSPECIFIED_ERROR)) | Acc]}; +on_client_timeout(_TRef, _Event, Acc) -> + {ok, Acc}. + +on_process_down(MRef, _Pid, Reason, Acc) -> + case emqx_ft_async_reply:take_by_mref(MRef) of + {ok, PacketId, TRef} -> + _ = emqx_utils:cancel_timer(TRef), + {stop, [?REPLY_OUTGOING(?PUBACK_PACKET(PacketId, reason_to_rc(Reason))) | Acc]}; + not_found -> + {ok, Acc} + end. + %%-------------------------------------------------------------------- %% Handlers for transfer messages %%-------------------------------------------------------------------- @@ -208,24 +242,13 @@ on_init(PacketId, Msg, Transfer, Meta) -> transfer => Transfer, filemeta => Meta }), - PacketKey = {self(), PacketId}, - Callback = fun(Result) -> - ?MODULE:on_complete("store_filemeta", PacketKey, Transfer, Result) - end, - with_responder(PacketKey, Callback, emqx_ft_conf:init_timeout(), fun() -> - case store_filemeta(Transfer, Meta) of - % Stored, ack through the responder right away - ok -> - emqx_ft_responder:ack(PacketKey, ok); - % Storage operation started, packet will be acked by the responder - % {async, Pid} -> - % ok = emqx_ft_responder:kickoff(PacketKey, Pid), - % ok; - %% Storage operation failed, ack through the responder - {error, _} = Error -> - emqx_ft_responder:ack(PacketKey, Error) - end - end). + %% Currently synchronous. + %% If we want to make it async, we need to use `emqx_ft_async_reply`, + %% like in `on_fin`. + case store_filemeta(Transfer, Meta) of + ok -> ?RC_SUCCESS; + {error, _} -> ?RC_UNSPECIFIED_ERROR + end. on_abort(_Msg, _FileId) -> %% TODO @@ -240,21 +263,13 @@ on_segment(PacketId, Msg, Transfer, Offset, Checksum) -> checksum => Checksum }), Segment = {Offset, Msg#message.payload}, - PacketKey = {self(), PacketId}, - Callback = fun(Result) -> - ?MODULE:on_complete("store_segment", PacketKey, Transfer, Result) - end, - with_responder(PacketKey, Callback, emqx_ft_conf:store_segment_timeout(), fun() -> - case store_segment(Transfer, Segment) of - ok -> - emqx_ft_responder:ack(PacketKey, ok); - % {async, Pid} -> - % ok = emqx_ft_responder:kickoff(PacketKey, Pid), - % ok; - {error, _} = Error -> - emqx_ft_responder:ack(PacketKey, Error) - end - end). + %% Currently synchronous. + %% If we want to make it async, we need to use `emqx_ft_async_reply`, + %% like in `on_fin`. + case store_segment(Transfer, Segment) of + ok -> ?RC_SUCCESS; + {error, _} -> ?RC_UNSPECIFIED_ERROR + end. on_fin(PacketId, Msg, Transfer, FinalSize, FinalChecksum) -> ?tp(info, "file_transfer_fin", #{ @@ -265,37 +280,30 @@ on_fin(PacketId, Msg, Transfer, FinalSize, FinalChecksum) -> checksum => FinalChecksum }), %% TODO: handle checksum? Do we need it? - FinPacketKey = {self(), PacketId}, - Callback = fun(Result) -> - ?MODULE:on_complete("assemble", FinPacketKey, Transfer, Result) - end, - with_responder(FinPacketKey, Callback, emqx_ft_conf:assemble_timeout(), fun() -> - case assemble(Transfer, FinalSize, FinalChecksum) of - %% Assembling completed, ack through the responder right away - ok -> - emqx_ft_responder:ack(FinPacketKey, ok); - %% Assembling started, packet will be acked by the responder - {async, Pid} -> - ok = emqx_ft_responder:kickoff(FinPacketKey, Pid), - ok; - %% Assembling failed, ack through the responder - {error, _} = Error -> - emqx_ft_responder:ack(FinPacketKey, Error) - end - end). + emqx_ft_async_reply:with_new_packet( + PacketId, + fun() -> + case assemble(Transfer, FinalSize, FinalChecksum) of + ok -> + ?RC_SUCCESS; + %% Assembling started, packet will be acked by monitor or timeout + {async, Pid} -> + ok = register_async_reply(Pid, PacketId), + ok = emqx_ft_storage:kickoff(Pid), + undefined; + {error, _} -> + ?RC_UNSPECIFIED_ERROR + end + end, + undefined + ). -with_responder(Key, Callback, Timeout, CriticalSection) -> - case emqx_ft_responder:start(Key, Callback, Timeout) of - %% We have new packet - {ok, _} -> - CriticalSection(); - %% Packet already received. - %% Since we are still handling the previous one, - %% we probably have retransmit here - {error, {already_started, _}} -> - ok - end, - undefined. +register_async_reply(Pid, PacketId) -> + MRef = erlang:monitor(process, Pid), + TRef = erlang:start_timer( + emqx_ft_conf:assemble_timeout(), self(), ?FT_EVENT({MRef, PacketId}) + ), + ok = emqx_ft_async_reply:register(PacketId, MRef, TRef). store_filemeta(Transfer, Segment) -> try @@ -335,28 +343,6 @@ transfer(Msg, FileId) -> ClientId = Msg#message.from, {clientid_to_binary(ClientId), FileId}. -on_complete(Op, {ChanPid, PacketId}, Transfer, Result) -> - ?tp(debug, "on_complete", #{ - operation => Op, - packet_id => PacketId, - transfer => Transfer - }), - case Result of - {Mode, ok} when Mode == ack orelse Mode == down -> - erlang:send(ChanPid, {puback, PacketId, [], ?RC_SUCCESS}); - {Mode, {error, _} = Reason} when Mode == ack orelse Mode == down -> - ?tp(error, Op ++ "_failed", #{ - transfer => Transfer, - reason => Reason - }), - erlang:send(ChanPid, {puback, PacketId, [], ?RC_UNSPECIFIED_ERROR}); - timeout -> - ?tp(error, Op ++ "_timed_out", #{ - transfer => Transfer - }), - erlang:send(ChanPid, {puback, PacketId, [], ?RC_UNSPECIFIED_ERROR}) - end. - validate(Validations, Fun) -> case do_validate(Validations, []) of {ok, Parsed} -> @@ -429,3 +415,20 @@ clientid_to_binary(A) when is_atom(A) -> atom_to_binary(A); clientid_to_binary(B) when is_binary(B) -> B. + +reason_to_rc(Reason) -> + case map_down_reason(Reason) of + ok -> ?RC_SUCCESS; + {error, _} -> ?RC_UNSPECIFIED_ERROR + end. + +map_down_reason(normal) -> + ok; +map_down_reason(shutdown) -> + ok; +map_down_reason({shutdown, Result}) -> + Result; +map_down_reason(noproc) -> + {error, noproc}; +map_down_reason(Error) -> + {error, {internal_error, Error}}. diff --git a/apps/emqx_ft/src/emqx_ft_async_reply.erl b/apps/emqx_ft/src/emqx_ft_async_reply.erl new file mode 100644 index 000000000..501f91629 --- /dev/null +++ b/apps/emqx_ft/src/emqx_ft_async_reply.erl @@ -0,0 +1,114 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_async_reply). + +-include_lib("emqx/include/logger.hrl"). +-include_lib("emqx/include/types.hrl"). +-include_lib("stdlib/include/ms_transform.hrl"). + +-export([ + create_tables/0, + info/0 +]). + +-export([ + register/3, + take_by_mref/1, + with_new_packet/3, + deregister_all/1 +]). + +-type channel_pid() :: pid(). +-type mon_ref() :: reference(). +-type timer_ref() :: reference(). +-type packet_id() :: emqx_types:packet_id(). + +%% packets waiting for async workers + +-define(MON_TAB, emqx_ft_async_mons). +-define(MON_KEY(MRef), ?MON_KEY(self(), MRef)). +-define(MON_KEY(ChannelPid, MRef), {ChannelPid, MRef}). + +%% async worker monitors by packet ids + +-define(PACKET_TAB, emqx_ft_async_packets). +-define(PACKET_KEY(PacketId), ?PACKET_KEY(self(), PacketId)). +-define(PACKET_KEY(ChannelPid, PacketId), {ChannelPid, PacketId}). + +%%-------------------------------------------------------------------- +%% API +%% ------------------------------------------------------------------- + +-spec create_tables() -> ok. +create_tables() -> + EtsOptions = [ + named_table, + public, + ordered_set, + {read_concurrency, true}, + {write_concurrency, true} + ], + ok = emqx_utils_ets:new(?MON_TAB, EtsOptions), + ok = emqx_utils_ets:new(?PACKET_TAB, EtsOptions), + ok. + +-spec register(packet_id(), mon_ref(), timer_ref()) -> ok. +register(PacketId, MRef, TRef) -> + _ = ets:insert(?PACKET_TAB, {?PACKET_KEY(PacketId), MRef}), + _ = ets:insert(?MON_TAB, {?MON_KEY(MRef), PacketId, TRef}), + ok. + +-spec with_new_packet(packet_id(), fun(() -> any()), any()) -> any(). +with_new_packet(PacketId, Fun, Default) -> + case ets:member(?PACKET_TAB, ?PACKET_KEY(PacketId)) of + true -> Default; + false -> Fun() + end. + +-spec take_by_mref(mon_ref()) -> {ok, packet_id(), timer_ref()} | not_found. +take_by_mref(MRef) -> + case ets:take(?MON_TAB, ?MON_KEY(MRef)) of + [{_, PacketId, TRef}] -> + _ = ets:delete(?PACKET_TAB, ?PACKET_KEY(PacketId)), + {ok, PacketId, TRef}; + [] -> + not_found + end. + +-spec deregister_all(channel_pid()) -> ok. +deregister_all(ChannelPid) -> + ok = deregister_packets(ChannelPid), + ok = deregister_mons(ChannelPid), + ok. + +-spec info() -> {non_neg_integer(), non_neg_integer()}. +info() -> + {ets:info(?MON_TAB, size), ets:info(?PACKET_TAB, size)}. + +%%-------------------------------------------------------------------- +%% Internal +%%------------------------------------------------------------------- + +deregister_packets(ChannelPid) when is_pid(ChannelPid) -> + MS = [{{?PACKET_KEY(ChannelPid, '_'), '_'}, [], [true]}], + _ = ets:select_delete(?PACKET_TAB, MS), + ok. + +deregister_mons(ChannelPid) -> + MS = [{{?MON_KEY(ChannelPid, '_'), '_', '_'}, [], [true]}], + _ = ets:select_delete(?MON_TAB, MS), + ok. diff --git a/apps/emqx_ft/src/emqx_ft_responder.erl b/apps/emqx_ft/src/emqx_ft_responder.erl deleted file mode 100644 index c2c62e1c2..000000000 --- a/apps/emqx_ft/src/emqx_ft_responder.erl +++ /dev/null @@ -1,116 +0,0 @@ -%%-------------------------------------------------------------------- -%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%%-------------------------------------------------------------------- - --module(emqx_ft_responder). - --behaviour(gen_server). - --include_lib("emqx/include/logger.hrl"). --include_lib("emqx/include/types.hrl"). - --include_lib("snabbkaffe/include/snabbkaffe.hrl"). - -%% API --export([start/3]). --export([kickoff/2]). --export([ack/2]). - -%% Supervisor API --export([start_link/3]). - --export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). - --define(REF(Key), {via, gproc, {n, l, {?MODULE, Key}}}). - --type key() :: term(). --type respfun() :: fun(({ack, _Result} | {down, _Result} | timeout) -> _SideEffect). - -%%-------------------------------------------------------------------- -%% API -%% ------------------------------------------------------------------- - --spec start(key(), respfun(), timeout()) -> startlink_ret(). -start(Key, RespFun, Timeout) -> - emqx_ft_responder_sup:start_child(Key, RespFun, Timeout). - --spec kickoff(key(), pid()) -> ok. -kickoff(Key, Pid) -> - gen_server:call(?REF(Key), {kickoff, Pid}). - --spec ack(key(), _Result) -> _Return. -ack(Key, Result) -> - % TODO: it's possible to avoid term copy - gen_server:call(?REF(Key), {ack, Result}, infinity). - --spec start_link(key(), timeout(), respfun()) -> startlink_ret(). -start_link(Key, RespFun, Timeout) -> - gen_server:start_link(?REF(Key), ?MODULE, {Key, RespFun, Timeout}, []). - -%%-------------------------------------------------------------------- -%% gen_server callbacks -%% ------------------------------------------------------------------- - -init({Key, RespFun, Timeout}) -> - _ = erlang:process_flag(trap_exit, true), - _TRef = erlang:send_after(Timeout, self(), timeout), - {ok, {Key, RespFun}}. - -handle_call({kickoff, Pid}, _From, St) -> - % TODO: more state? - _MRef = erlang:monitor(process, Pid), - _ = Pid ! kickoff, - {reply, ok, St}; -handle_call({ack, Result}, _From, {Key, RespFun}) -> - Ret = apply(RespFun, [{ack, Result}]), - ?tp(debug, ft_responder_ack, #{key => Key, result => Result, return => Ret}), - {stop, {shutdown, Ret}, Ret, undefined}; -handle_call(Msg, _From, State) -> - ?SLOG(warning, #{msg => "unknown_call", call_msg => Msg}), - {reply, {error, unknown_call}, State}. - -handle_cast(Msg, State) -> - ?SLOG(warning, #{msg => "unknown_cast", cast_msg => Msg}), - {noreply, State}. - -handle_info(timeout, {Key, RespFun}) -> - Ret = apply(RespFun, [timeout]), - ?tp(debug, ft_responder_timeout, #{key => Key, return => Ret}), - {stop, {shutdown, Ret}, undefined}; -handle_info({'DOWN', _MRef, process, _Pid, Reason}, {Key, RespFun}) -> - Ret = apply(RespFun, [{down, map_down_reason(Reason)}]), - ?tp(debug, ft_responder_procdown, #{key => Key, reason => Reason, return => Ret}), - {stop, {shutdown, Ret}, undefined}; -handle_info(Msg, State) -> - ?SLOG(warning, #{msg => "unknown_message", info_msg => Msg}), - {noreply, State}. - -terminate(_Reason, undefined) -> - ok; -terminate(Reason, {Key, RespFun}) -> - Ret = apply(RespFun, [timeout]), - ?tp(debug, ft_responder_shutdown, #{key => Key, reason => Reason, return => Ret}), - ok. - -map_down_reason(normal) -> - ok; -map_down_reason(shutdown) -> - ok; -map_down_reason({shutdown, Result}) -> - Result; -map_down_reason(noproc) -> - {error, noproc}; -map_down_reason(Error) -> - {error, {internal_error, Error}}. diff --git a/apps/emqx_ft/src/emqx_ft_responder_sup.erl b/apps/emqx_ft/src/emqx_ft_responder_sup.erl deleted file mode 100644 index fb3932425..000000000 --- a/apps/emqx_ft/src/emqx_ft_responder_sup.erl +++ /dev/null @@ -1,48 +0,0 @@ -%%-------------------------------------------------------------------- -%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%%-------------------------------------------------------------------- - --module(emqx_ft_responder_sup). - --export([start_link/0]). --export([start_child/3]). - --behaviour(supervisor). --export([init/1]). - --define(SUPERVISOR, ?MODULE). - -%% - --spec start_link() -> {ok, pid()}. -start_link() -> - supervisor:start_link({local, ?SUPERVISOR}, ?MODULE, []). - -start_child(Key, RespFun, Timeout) -> - supervisor:start_child(?SUPERVISOR, [Key, RespFun, Timeout]). - --spec init(_) -> {ok, {supervisor:sup_flags(), [supervisor:child_spec()]}}. -init(_) -> - Flags = #{ - strategy => simple_one_for_one, - intensity => 100, - period => 100 - }, - ChildSpec = #{ - id => responder, - start => {emqx_ft_responder, start_link, []}, - restart => temporary - }, - {ok, {Flags, [ChildSpec]}}. diff --git a/apps/emqx_ft/src/emqx_ft_storage.erl b/apps/emqx_ft/src/emqx_ft_storage.erl index 04fac3b38..506cf9789 100644 --- a/apps/emqx_ft/src/emqx_ft_storage.erl +++ b/apps/emqx_ft/src/emqx_ft_storage.erl @@ -23,6 +23,7 @@ store_filemeta/2, store_segment/2, assemble/3, + kickoff/1, files/0, files/1, @@ -121,6 +122,13 @@ store_segment(Transfer, Segment) -> assemble(Transfer, Size, FinOpts) -> dispatch(assemble, [Transfer, Size, FinOpts]). +-spec kickoff(pid()) -> ok. +kickoff(Pid) -> + _ = erlang:send(Pid, kickoff), + ok. + +%% + -spec files() -> {ok, page(file_info(), _)} | {error, term()}. files() -> diff --git a/apps/emqx_ft/src/emqx_ft_sup.erl b/apps/emqx_ft/src/emqx_ft_sup.erl index 0308668ab..6d3936cf6 100644 --- a/apps/emqx_ft/src/emqx_ft_sup.erl +++ b/apps/emqx_ft/src/emqx_ft_sup.erl @@ -28,6 +28,8 @@ start_link() -> supervisor:start_link({local, ?SERVER}, ?MODULE, []). init([]) -> + ok = emqx_ft_async_reply:create_tables(), + SupFlags = #{ strategy => one_for_one, intensity => 100, @@ -52,14 +54,5 @@ init([]) -> modules => [emqx_ft_storage_fs_reader_sup] }, - Responder = #{ - id => emqx_ft_responder_sup, - start => {emqx_ft_responder_sup, start_link, []}, - restart => permanent, - shutdown => infinity, - type => worker, - modules => [emqx_ft_responder_sup] - }, - - ChildSpecs = [Responder, AssemblerSup, FileReaderSup], + ChildSpecs = [AssemblerSup, FileReaderSup], {ok, {SupFlags, ChildSpecs}}. diff --git a/apps/emqx_ft/test/emqx_ft_SUITE.erl b/apps/emqx_ft/test/emqx_ft_SUITE.erl index 290cda333..7da9ccf69 100644 --- a/apps/emqx_ft/test/emqx_ft_SUITE.erl +++ b/apps/emqx_ft/test/emqx_ft_SUITE.erl @@ -37,7 +37,7 @@ all() -> groups() -> [ - {single_node, [parallel], [ + {single_node, [], [ t_assemble_crash, t_corrupted_segment_retry, t_invalid_checksum, @@ -76,7 +76,7 @@ init_per_suite(Config) -> [ {emqx_ft, #{config => emqx_ft_test_helpers:config(Storage)}} ], - #{work_dir => ?config(priv_dir, Config)} + #{work_dir => emqx_cth_suite:work_dir(Config)} ), [{suite_apps, Apps} | Config]. diff --git a/apps/emqx_ft/test/emqx_ft_async_reply_SUITE.erl b/apps/emqx_ft/test/emqx_ft_async_reply_SUITE.erl new file mode 100644 index 000000000..78a9b371c --- /dev/null +++ b/apps/emqx_ft/test/emqx_ft_async_reply_SUITE.erl @@ -0,0 +1,247 @@ +%%-------------------------------------------------------------------- +%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%-------------------------------------------------------------------- + +-module(emqx_ft_async_reply_SUITE). + +-compile(export_all). +-compile(nowarn_export_all). + +-include_lib("common_test/include/ct.hrl"). +-include_lib("stdlib/include/assert.hrl"). +-include_lib("emqx/include/asserts.hrl"). + +all() -> emqx_common_test_helpers:all(?MODULE). + +init_per_suite(Config) -> + Apps = emqx_cth_suite:start( + [ + {emqx, #{override_env => [{boot_modules, [broker, listeners]}]}}, + {emqx_ft, "file_transfer { enable = true, assemble_timeout = 1s }"} + ], + #{work_dir => ?config(priv_dir, Config)} + ), + [{suite_apps, Apps} | Config]. + +end_per_suite(Config) -> + ok = emqx_cth_suite:stop(?config(suite_apps, Config)), + ok. + +init_per_testcase(_Case, Config) -> + ok = snabbkaffe:start_trace(), + Config. + +end_per_testcase(_Case, _Config) -> + ok = snabbkaffe:stop(), + ok. + +%%-------------------------------------------------------------------- +%% Tests +%%-------------------------------------------------------------------- + +t_register(_Config) -> + PacketId = 1, + MRef = make_ref(), + TRef = make_ref(), + ok = emqx_ft_async_reply:register(PacketId, MRef, TRef), + + ?assertEqual( + undefined, + emqx_ft_async_reply:with_new_packet(PacketId, fun() -> ok end, undefined) + ), + + ?assertEqual( + ok, + emqx_ft_async_reply:with_new_packet(2, fun() -> ok end, undefined) + ), + + ?assertEqual( + {ok, PacketId, TRef}, + emqx_ft_async_reply:take_by_mref(MRef) + ). + +t_process_independence(_Config) -> + PacketId = 1, + MRef = make_ref(), + TRef = make_ref(), + ok = emqx_ft_async_reply:register(PacketId, MRef, TRef), + + Self = self(), + + spawn_link(fun() -> + Self ! emqx_ft_async_reply:take_by_mref(MRef) + end), + + Res1 = + receive + Msg1 -> Msg1 + end, + + ?assertEqual( + not_found, + Res1 + ), + + spawn_link(fun() -> + Self ! emqx_ft_async_reply:with_new_packet(PacketId, fun() -> ok end, undefined) + end), + + Res2 = + receive + Msg2 -> Msg2 + end, + + ?assertEqual( + ok, + Res2 + ). + +t_take(_Config) -> + PacketId = 1, + MRef = make_ref(), + TRef = make_ref(), + ok = emqx_ft_async_reply:register(PacketId, MRef, TRef), + + ?assertEqual( + {ok, PacketId, TRef}, + emqx_ft_async_reply:take_by_mref(MRef) + ), + + ?assertEqual( + not_found, + emqx_ft_async_reply:take_by_mref(MRef) + ), + + ?assertEqual( + ok, + emqx_ft_async_reply:with_new_packet(2, fun() -> ok end, undefined) + ). + +t_cleanup(_Config) -> + PacketId = 1, + MRef0 = make_ref(), + TRef0 = make_ref(), + MRef1 = make_ref(), + TRef1 = make_ref(), + ok = emqx_ft_async_reply:register(PacketId, MRef0, TRef0), + + Self = self(), + + Pid = spawn_link(fun() -> + ok = emqx_ft_async_reply:register(PacketId, MRef1, TRef1), + receive + kickoff -> + ?assertEqual( + undefined, + emqx_ft_async_reply:with_new_packet(PacketId, fun() -> ok end, undefined) + ), + + ?assertEqual( + {ok, PacketId, TRef1}, + emqx_ft_async_reply:take_by_mref(MRef1) + ), + + Self ! done + end + end), + + ?assertEqual( + undefined, + emqx_ft_async_reply:with_new_packet(PacketId, fun() -> ok end, undefined) + ), + + ok = emqx_ft_async_reply:deregister_all(Self), + + ?assertEqual( + ok, + emqx_ft_async_reply:with_new_packet(PacketId, fun() -> ok end, undefined) + ), + + Pid ! kickoff, + + receive + done -> ok + end. + +t_reply_by_tiemout(_Config) -> + process_flag(trap_exit, true), + ClientId = atom_to_binary(?FUNCTION_NAME), + C = emqx_ft_test_helpers:start_client(ClientId, node()), + + SleepForever = fun() -> + Ref = make_ref(), + receive + Ref -> ok + end + end, + + ok = meck:new(emqx_ft_storage, [passthrough]), + meck:expect(emqx_ft_storage, assemble, fun(_, _, _) -> {async, spawn_link(SleepForever)} end), + + FinTopic = <<"$file/fakeid/fin/999999">>, + + ?assertMatch( + {ok, #{reason_code_name := unspecified_error}}, + emqtt:publish(C, FinTopic, <<>>, 1) + ), + + meck:unload(emqx_ft_storage), + emqtt:stop(C). + +t_cleanup_by_cm(_Config) -> + process_flag(trap_exit, true), + ClientId = atom_to_binary(?FUNCTION_NAME), + C = emqx_ft_test_helpers:start_client(ClientId, node()), + + ok = meck:new(emqx_ft_storage, [passthrough]), + meck:expect(emqx_ft_storage, kickoff, fun(_) -> meck:exception(error, oops) end), + + FinTopic = <<"$file/fakeid/fin/999999">>, + + [ClientPid] = emqx_cm:lookup_channels(ClientId), + + ?assertWaitEvent( + begin + emqtt:publish(C, FinTopic, <<>>, 1), + exit(ClientPid, kill) + end, + #{?snk_kind := emqx_cm_clean_down, client_id := ClientId}, + 1000 + ), + + ?assertEqual( + {0, 0}, + emqx_ft_async_reply:info() + ), + + meck:unload(emqx_ft_storage). + +t_unrelated_events(_Config) -> + process_flag(trap_exit, true), + ClientId = atom_to_binary(?FUNCTION_NAME), + C = emqx_ft_test_helpers:start_client(ClientId, node()), + [ClientPid] = emqx_cm:lookup_channels(ClientId), + + erlang:monitor(process, ClientPid), + + ClientPid ! {'DOWN', make_ref(), process, self(), normal}, + ClientPid ! {timeout, make_ref(), unknown_timer_event}, + + ?assertNotReceive( + {'DOWN', _Ref, process, ClientPid, _Reason}, + 500 + ), + + emqtt:stop(C). diff --git a/apps/emqx_ft/test/emqx_ft_conf_SUITE.erl b/apps/emqx_ft/test/emqx_ft_conf_SUITE.erl index 3fdfdf65a..0acdea213 100644 --- a/apps/emqx_ft/test/emqx_ft_conf_SUITE.erl +++ b/apps/emqx_ft/test/emqx_ft_conf_SUITE.erl @@ -32,13 +32,12 @@ end_per_suite(_Config) -> ok. init_per_testcase(Case, Config) -> - WorkDir = filename:join(?config(priv_dir, Config), Case), Apps = emqx_cth_suite:start( [ {emqx_conf, #{}}, {emqx_ft, #{config => "file_transfer {}"}} ], - #{work_dir => WorkDir} + #{work_dir => emqx_cth_suite:work_dir(Case, Config)} ), [{suite_apps, Apps} | Config]. diff --git a/apps/emqx_ft/test/emqx_ft_responder_SUITE.erl b/apps/emqx_ft/test/emqx_ft_responder_SUITE.erl deleted file mode 100644 index 751861206..000000000 --- a/apps/emqx_ft/test/emqx_ft_responder_SUITE.erl +++ /dev/null @@ -1,84 +0,0 @@ -%%-------------------------------------------------------------------- -%% Copyright (c) 2020-2023 EMQ Technologies Co., Ltd. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%%-------------------------------------------------------------------- - --module(emqx_ft_responder_SUITE). - --compile(export_all). --compile(nowarn_export_all). - --include_lib("stdlib/include/assert.hrl"). - -all() -> emqx_common_test_helpers:all(?MODULE). - -init_per_suite(Config) -> - ok = emqx_common_test_helpers:start_apps([emqx_ft], emqx_ft_test_helpers:env_handler(Config)), - Config. - -end_per_suite(_Config) -> - ok = emqx_common_test_helpers:stop_apps([emqx_ft]), - ok. - -init_per_testcase(_Case, Config) -> - Config. - -end_per_testcase(_Case, _Config) -> - ok. - -t_start_ack(_Config) -> - Key = <<"test">>, - DefaultAction = fun({ack, Ref}) -> Ref end, - ?assertMatch( - {ok, _Pid}, - emqx_ft_responder:start(Key, DefaultAction, 1000) - ), - ?assertMatch( - {error, {already_started, _Pid}}, - emqx_ft_responder:start(Key, DefaultAction, 1000) - ), - Ref = make_ref(), - ?assertEqual( - Ref, - emqx_ft_responder:ack(Key, Ref) - ), - ?assertExit( - {noproc, _}, - emqx_ft_responder:ack(Key, Ref) - ). - -t_timeout(_Config) -> - Key = <<"test">>, - Self = self(), - DefaultAction = fun(timeout) -> Self ! {timeout, Key} end, - {ok, _Pid} = emqx_ft_responder:start(Key, DefaultAction, 20), - receive - {timeout, Key} -> - ok - after 100 -> - ct:fail("emqx_ft_responder not called") - end, - ?assertExit( - {noproc, _}, - emqx_ft_responder:ack(Key, oops) - ). - -t_unknown_msgs(_Config) -> - {ok, Pid} = emqx_ft_responder:start(make_ref(), fun(_) -> ok end, 100), - Pid ! {unknown_msg, <<"test">>}, - ok = gen_server:cast(Pid, {unknown_msg, <<"test">>}), - ?assertEqual( - {error, unknown_call}, - gen_server:call(Pid, {unknown_call, <<"test">>}) - ). diff --git a/apps/emqx_ft/test/emqx_ft_storage_fs_SUITE.erl b/apps/emqx_ft/test/emqx_ft_storage_fs_SUITE.erl index a57cdf621..52d372e63 100644 --- a/apps/emqx_ft/test/emqx_ft_storage_fs_SUITE.erl +++ b/apps/emqx_ft/test/emqx_ft_storage_fs_SUITE.erl @@ -36,12 +36,11 @@ groups() -> init_per_suite(Config) -> Storage = emqx_ft_test_helpers:local_storage(Config), - WorkDir = ?config(priv_dir, Config), Apps = emqx_cth_suite:start( [ {emqx_ft, #{config => emqx_ft_test_helpers:config(Storage)}} ], - #{work_dir => WorkDir} + #{work_dir => emqx_cth_suite:work_dir(Config)} ), [{suite_apps, Apps} | Config]. diff --git a/apps/emqx_ft/test/emqx_ft_storage_fs_gc_SUITE.erl b/apps/emqx_ft/test/emqx_ft_storage_fs_gc_SUITE.erl index b14fc7edd..311ad7fbd 100644 --- a/apps/emqx_ft/test/emqx_ft_storage_fs_gc_SUITE.erl +++ b/apps/emqx_ft/test/emqx_ft_storage_fs_gc_SUITE.erl @@ -28,7 +28,7 @@ all() -> emqx_common_test_helpers:all(?MODULE). init_per_suite(Config) -> - Apps = emqx_cth_suite:start([emqx], #{work_dir => ?config(priv_dir, Config)}), + Apps = emqx_cth_suite:start([emqx], #{work_dir => emqx_cth_suite:work_dir(Config)}), [{suite_apps, Apps} | Config]. end_per_suite(Config) -> diff --git a/apps/emqx_gateway/src/emqx_gateway.app.src b/apps/emqx_gateway/src/emqx_gateway.app.src index 582269ce6..47899ceeb 100644 --- a/apps/emqx_gateway/src/emqx_gateway.app.src +++ b/apps/emqx_gateway/src/emqx_gateway.app.src @@ -1,7 +1,7 @@ %% -*- mode: erlang -*- {application, emqx_gateway, [ {description, "The Gateway management application"}, - {vsn, "0.1.23"}, + {vsn, "0.1.24"}, {registered, []}, {mod, {emqx_gateway_app, []}}, {applications, [kernel, stdlib, emqx, emqx_authn, emqx_ctl]}, diff --git a/apps/emqx_ldap/src/emqx_ldap.app.src b/apps/emqx_ldap/src/emqx_ldap.app.src index 7a252dd33..152a7b6a9 100644 --- a/apps/emqx_ldap/src/emqx_ldap.app.src +++ b/apps/emqx_ldap/src/emqx_ldap.app.src @@ -1,6 +1,6 @@ {application, emqx_ldap, [ {description, "EMQX LDAP Connector"}, - {vsn, "0.1.1"}, + {vsn, "0.1.2"}, {registered, []}, {applications, [ kernel, diff --git a/apps/emqx_machine/src/emqx_machine.app.src b/apps/emqx_machine/src/emqx_machine.app.src index dd1915cfb..813d41e5b 100644 --- a/apps/emqx_machine/src/emqx_machine.app.src +++ b/apps/emqx_machine/src/emqx_machine.app.src @@ -3,7 +3,7 @@ {id, "emqx_machine"}, {description, "The EMQX Machine"}, % strict semver, bump manually! - {vsn, "0.2.12"}, + {vsn, "0.2.13"}, {modules, []}, {registered, []}, {applications, [kernel, stdlib, emqx_ctl]}, diff --git a/apps/emqx_machine/src/emqx_machine_boot.erl b/apps/emqx_machine/src/emqx_machine_boot.erl index cb05280f5..a27c2156d 100644 --- a/apps/emqx_machine/src/emqx_machine_boot.erl +++ b/apps/emqx_machine/src/emqx_machine_boot.erl @@ -30,12 +30,19 @@ -export([sorted_reboot_apps/1, reboot_apps/0]). -endif. -%% these apps are always (re)started by emqx_machine +%% These apps are always (re)started by emqx_machine: -define(BASIC_REBOOT_APPS, [gproc, esockd, ranch, cowboy, emqx]). -%% If any of these applications crash, the entire EMQX node shuts down +%% If any of these applications crash, the entire EMQX node shuts down: -define(BASIC_PERMANENT_APPS, [mria, ekka, esockd, emqx]). +%% These apps should NOT be (re)started automatically: +-define(EXCLUDED_APPS, [system_monitor, observer_cli, jq]). + +%% These apps are optional, they may or may not be present in the +%% release, depending on the build flags: +-define(OPTIONAL_APPS, [bcrypt, observer]). + post_boot() -> ok = ensure_apps_started(), ok = print_vsn(), @@ -150,9 +157,9 @@ basic_reboot_apps() -> ?BASIC_REBOOT_APPS ++ (BusinessApps -- excluded_apps()). excluded_apps() -> - OptionalApps = [bcrypt, jq, observer], - [system_monitor, observer_cli] ++ - [App || App <- OptionalApps, not is_app(App)]. + %% Optional apps _should_ be (re)started automatically, but only + %% when they are found in the release: + ?EXCLUDED_APPS ++ [App || App <- ?OPTIONAL_APPS, not is_app(App)]. is_app(Name) -> case application:load(Name) of diff --git a/apps/emqx_management/src/emqx_management.app.src b/apps/emqx_management/src/emqx_management.app.src index e1056ab0c..f16156c35 100644 --- a/apps/emqx_management/src/emqx_management.app.src +++ b/apps/emqx_management/src/emqx_management.app.src @@ -2,7 +2,7 @@ {application, emqx_management, [ {description, "EMQX Management API and CLI"}, % strict semver, bump manually! - {vsn, "5.0.28"}, + {vsn, "5.0.29"}, {modules, []}, {registered, [emqx_management_sup]}, {applications, [kernel, stdlib, emqx_plugins, minirest, emqx, emqx_ctl, emqx_bridge_http]}, diff --git a/apps/emqx_management/src/emqx_mgmt_api_clients.erl b/apps/emqx_management/src/emqx_mgmt_api_clients.erl index 18ac65ae6..d9b6d9bd5 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_clients.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_clients.erl @@ -927,7 +927,7 @@ format_channel_info(WhichNode, {_, ClientInfo0, ClientStats}) -> retry_interval, upgrade_qos, zone, - %% sessionID, defined in emqx_session.erl + %% session_id, defined in emqx_session.erl id ], TimesKeys = [created_at, connected_at, disconnected_at], diff --git a/apps/emqx_management/src/emqx_mgmt_api_publish.erl b/apps/emqx_management/src/emqx_mgmt_api_publish.erl index ba486ab89..f0834af96 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_publish.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_publish.erl @@ -57,7 +57,7 @@ schema("/publish") -> responses => #{ ?ALL_IS_WELL => hoconsc:mk(hoconsc:ref(?MODULE, publish_ok)), ?PARTIALLY_OK => hoconsc:mk(hoconsc:ref(?MODULE, publish_error)), - ?BAD_REQUEST => bad_request_schema(), + ?BAD_REQUEST => hoconsc:mk(hoconsc:ref(?MODULE, bad_request)), ?DISPATCH_ERROR => hoconsc:mk(hoconsc:ref(?MODULE, publish_error)) } } @@ -196,11 +196,13 @@ fields(bad_request) -> [ {code, hoconsc:mk(string(), #{ - desc => <<"BAD_REQUEST">> + desc => <<"BAD_REQUEST">>, + example => ?RC_TOPIC_NAME_INVALID })}, {message, hoconsc:mk(binary(), #{ - desc => ?DESC(error_message) + desc => ?DESC(error_message), + example => to_binary(emqx_reason_codes:name(?RC_TOPIC_NAME_INVALID)) })} ]. diff --git a/apps/emqx_management/src/emqx_mgmt_api_trace.erl b/apps/emqx_management/src/emqx_mgmt_api_trace.erl index 17adf7460..bcc21a97b 100644 --- a/apps/emqx_management/src/emqx_mgmt_api_trace.erl +++ b/apps/emqx_management/src/emqx_mgmt_api_trace.erl @@ -22,6 +22,7 @@ -include_lib("emqx/include/logger.hrl"). -include_lib("snabbkaffe/include/snabbkaffe.hrl"). -include_lib("hocon/include/hoconsc.hrl"). +-include_lib("emqx_utils/include/emqx_utils_api.hrl"). -export([ api_spec/0, @@ -51,8 +52,7 @@ -define(MAX_SINT32, 2147483647). -define(TO_BIN(_B_), iolist_to_binary(_B_)). --define(NOT_FOUND(N), {404, #{code => 'NOT_FOUND', message => ?TO_BIN([N, " NOT FOUND"])}}). --define(SERVICE_UNAVAILABLE(C, M), {503, #{code => C, message => ?TO_BIN(M)}}). +-define(NOT_FOUND_WITH_MSG(N), ?NOT_FOUND(?TO_BIN([N, " NOT FOUND"]))). -define(TAGS, [<<"Trace">>]). namespace() -> "trace". @@ -476,13 +476,13 @@ format_trace(Trace0) -> delete_trace(delete, #{bindings := #{name := Name}}) -> case emqx_trace:delete(Name) of ok -> {204}; - {error, not_found} -> ?NOT_FOUND(Name) + {error, not_found} -> ?NOT_FOUND_WITH_MSG(Name) end. update_trace(put, #{bindings := #{name := Name}}) -> case emqx_trace:update(Name, false) of ok -> {200, #{enable => false, name => Name}}; - {error, not_found} -> ?NOT_FOUND(Name) + {error, not_found} -> ?NOT_FOUND_WITH_MSG(Name) end. %% if HTTP request headers include accept-encoding: gzip and file size > 300 bytes. @@ -493,64 +493,85 @@ download_trace_log(get, #{bindings := #{name := Name}, query_string := Query}) - case parse_node(Query, undefined) of {ok, Node} -> TraceFiles = collect_trace_file(Node, TraceLog), - %% We generate a session ID so that we name files - %% with unique names. Then we won't cause - %% overwrites for concurrent requests. - SessionId = emqx_utils:gen_id(), - ZipDir = filename:join([emqx_trace:zip_dir(), SessionId]), - ok = file:make_dir(ZipDir), - %% Write files to ZipDir and create an in-memory zip file - Zips = group_trace_file(ZipDir, TraceLog, TraceFiles), - ZipName = binary_to_list(Name) ++ ".zip", - Binary = - try - {ok, {ZipName, Bin}} = zip:zip(ZipName, Zips, [memory, {cwd, ZipDir}]), - Bin - after - %% emqx_trace:delete_files_after_send(ZipFileName, Zips), - %% TODO use file replace file_binary.(delete file after send is not ready now). - ok = file:del_dir_r(ZipDir) - end, - ?tp(trace_api_download_trace_log, #{ - files => Zips, - name => Name, - session_id => SessionId, - zip_dir => ZipDir, - zip_name => ZipName - }), - Headers = #{ - <<"content-type">> => <<"application/x-zip">>, - <<"content-disposition">> => iolist_to_binary( - "attachment; filename=" ++ ZipName - ) - }, - {200, Headers, {file_binary, ZipName, Binary}}; + maybe_download_trace_log(Name, TraceLog, TraceFiles); {error, not_found} -> - ?NOT_FOUND(<<"Node">>) + ?NOT_FOUND_WITH_MSG(<<"Node">>) end; {error, not_found} -> - ?NOT_FOUND(Name) + ?NOT_FOUND_WITH_MSG(Name) end. +maybe_download_trace_log(Name, TraceLog, TraceFiles) -> + case group_trace_files(TraceLog, TraceFiles) of + #{nonempty := Files} -> + do_download_trace_log(Name, TraceLog, Files); + #{error := Reasons} -> + ?INTERNAL_ERROR(Reasons); + #{empty := _} -> + ?NOT_FOUND(<<"Trace is empty">>) + end. + +do_download_trace_log(Name, TraceLog, TraceFiles) -> + %% We generate a session ID so that we name files + %% with unique names. Then we won't cause + %% overwrites for concurrent requests. + SessionId = emqx_utils:gen_id(), + ZipDir = filename:join([emqx_trace:zip_dir(), SessionId]), + ok = file:make_dir(ZipDir), + %% Write files to ZipDir and create an in-memory zip file + Zips = group_trace_file(ZipDir, TraceLog, TraceFiles), + ZipName = binary_to_list(Name) ++ ".zip", + Binary = + try + {ok, {ZipName, Bin}} = zip:zip(ZipName, Zips, [memory, {cwd, ZipDir}]), + Bin + after + %% emqx_trace:delete_files_after_send(ZipFileName, Zips), + %% TODO use file replace file_binary.(delete file after send is not ready now). + ok = file:del_dir_r(ZipDir) + end, + ?tp(trace_api_download_trace_log, #{ + files => Zips, + name => Name, + session_id => SessionId, + zip_dir => ZipDir, + zip_name => ZipName + }), + Headers = #{ + <<"content-type">> => <<"application/x-zip">>, + <<"content-disposition">> => iolist_to_binary( + "attachment; filename=" ++ ZipName + ) + }, + {200, Headers, {file_binary, ZipName, Binary}}. + +group_trace_files(TraceLog, TraceFiles) -> + maps:groups_from_list( + fun + ({ok, _Node, <<>>}) -> + empty; + ({ok, _Node, _Bin}) -> + nonempty; + ({error, Node, Reason}) -> + ?SLOG(error, #{ + msg => "download_trace_log_error", + node => Node, + log => TraceLog, + reason => Reason + }), + error + end, + TraceFiles + ). + group_trace_file(ZipDir, TraceLog, TraceFiles) -> lists:foldl( - fun(Res, Acc) -> - case Res of - {ok, Node, Bin} -> - FileName = Node ++ "-" ++ TraceLog, - ZipName = filename:join([ZipDir, FileName]), - case file:write_file(ZipName, Bin) of - ok -> [FileName | Acc]; - _ -> Acc - end; - {error, Node, Reason} -> - ?SLOG(error, #{ - msg => "download_trace_log_error", - node => Node, - log => TraceLog, - reason => Reason - }), - Acc + fun({ok, Node, Bin}, Acc) -> + FileName = Node ++ "-" ++ TraceLog, + ZipName = filename:join([ZipDir, FileName]), + case file:write_file(ZipName, Bin) of + ok -> [FileName | Acc]; + _ -> Acc end end, [], @@ -578,7 +599,7 @@ log_file_detail(get, #{bindings := #{name := Name}}) -> TraceFiles = collect_trace_file_detail(TraceLog), {200, group_trace_file_detail(TraceFiles)}; {error, not_found} -> - ?NOT_FOUND(Name) + ?NOT_FOUND_WITH_MSG(Name) end. group_trace_file_detail(TraceLogDetail) -> @@ -609,7 +630,7 @@ stream_log_file(get, #{bindings := #{name := Name}, query_string := Query}) -> Meta = #{<<"position">> => Position, <<"bytes">> => Bytes}, {200, #{meta => Meta, items => <<"">>}}; {error, not_found} -> - ?NOT_FOUND(Name); + ?NOT_FOUND_WITH_MSG(Name); {error, enomem} -> ?SLOG(warning, #{ code => not_enough_mem, @@ -617,12 +638,12 @@ stream_log_file(get, #{bindings := #{name := Name}, query_string := Query}) -> bytes => Bytes, name => Name }), - ?SERVICE_UNAVAILABLE('SERVICE_UNAVAILABLE', <<"Requested chunk size too big">>); + ?SERVICE_UNAVAILABLE(<<"Requested chunk size too big">>); {badrpc, nodedown} -> - ?NOT_FOUND(<<"Node">>) + ?NOT_FOUND_WITH_MSG(<<"Node">>) end; {error, not_found} -> - ?NOT_FOUND(<<"Node">>) + ?NOT_FOUND_WITH_MSG(<<"Node">>) end. -spec get_trace_size() -> #{{node(), file:name_all()} => non_neg_integer()}. diff --git a/apps/emqx_management/src/emqx_mgmt_cli.erl b/apps/emqx_management/src/emqx_mgmt_cli.erl index 121e5b1a8..72498fe12 100644 --- a/apps/emqx_management/src/emqx_mgmt_cli.erl +++ b/apps/emqx_management/src/emqx_mgmt_cli.erl @@ -693,7 +693,7 @@ authz(["cache-clean", "all"]) -> with_log(fun emqx_mgmt:clean_authz_cache_all/0, Msg); authz(["cache-clean", ClientId]) -> Msg = io_lib:format("Drain ~ts authz cache", [ClientId]), - with_log(fun() -> emqx_mgmt:clean_authz_cache(ClientId) end, Msg); + with_log(fun() -> emqx_mgmt:clean_authz_cache(iolist_to_binary(ClientId)) end, Msg); authz(_) -> emqx_ctl:usage( [ @@ -907,12 +907,14 @@ for_node(Fun, Node) -> end. with_log(Fun, Msg) -> - case Fun() of + Res = Fun(), + case Res of ok -> emqx_ctl:print("~s OK~n", [Msg]); {error, Reason} -> emqx_ctl:print("~s FAILED~n~p~n", [Msg, Reason]) - end. + end, + Res. cluster_info() -> RunningNodes = safe_call_mria(running_nodes, [], []), diff --git a/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl index 8f9a4a5ca..f4725b453 100644 --- a/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_api_trace_SUITE.erl @@ -369,6 +369,28 @@ t_trace_files_are_deleted_after_download(_Config) -> ), ok. +t_download_empty_trace(_Config) -> + ClientId = <<"client-test-empty-trace-download">>, + Now = erlang:system_time(second), + Name = <<"test_client_id_empty_trace">>, + load(), + create_trace(Name, ClientId, Now), + ok = emqx_trace_handler_SUITE:filesync(Name, clientid), + ?check_trace( + begin + ?wait_async_action( + ?assertMatch( + {ok, _}, request_api(put, api_path(<<"trace/", Name/binary, "/stop">>), #{}) + ), + #{?snk_kind := update_trace_done} + ) + end, + [] + ), + {error, {{_, 404, _}, _Headers, Body}} = + request_api(get, api_path(<<"trace/", Name/binary, "/download">>), [], #{return_all => true}), + ?assertMatch(#{<<"message">> := <<"Trace is empty">>}, emqx_utils_json:decode(Body)). + to_rfc3339(Second) -> list_to_binary(calendar:system_time_to_rfc3339(Second)). @@ -376,8 +398,11 @@ request_api(Method, Url) -> request_api(Method, Url, []). request_api(Method, Url, Body) -> - Opts = #{httpc_req_opts => [{body_format, binary}]}, - emqx_mgmt_api_test_util:request_api(Method, Url, [], [], Body, Opts). + request_api(Method, Url, Body, #{}). + +request_api(Method, Url, Body, Opts) -> + Opts1 = Opts#{httpc_req_opts => [{body_format, binary}]}, + emqx_mgmt_api_test_util:request_api(Method, Url, [], [], Body, Opts1). api_path(Path) -> emqx_mgmt_api_test_util:api_path([Path]). diff --git a/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl index 33292e54e..405890729 100644 --- a/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_cli_SUITE.erl @@ -25,6 +25,7 @@ all() -> init_per_suite(Config) -> emqx_mgmt_api_test_util:init_suite([emqx_conf, emqx_management]), + ok = emqx_mgmt_cli:load(), Config. end_per_suite(_) -> @@ -183,9 +184,25 @@ t_listeners(_Config) -> t_authz(_Config) -> %% authz cache-clean all # Clears authorization cache on all nodes - emqx_ctl:run_command(["authz", "cache-clean", "all"]), - %% authz cache-clean node # Clears authorization cache on given node + ?assertMatch(ok, emqx_ctl:run_command(["authz", "cache-clean", "all"])), + ClientId = "authz_clean_test", + ClientIdBin = list_to_binary(ClientId), %% authz cache-clean # Clears authorization cache for given client + ?assertMatch({error, not_found}, emqx_ctl:run_command(["authz", "cache-clean", ClientId])), + {ok, C} = emqtt:start_link([{clean_start, true}, {clientid, ClientId}]), + {ok, _} = emqtt:connect(C), + {ok, _, _} = emqtt:subscribe(C, <<"topic/1">>, 1), + [Pid] = emqx_cm:lookup_channels(ClientIdBin), + ?assertMatch([_], gen_server:call(Pid, list_authz_cache)), + + ?assertMatch(ok, emqx_ctl:run_command(["authz", "cache-clean", ClientId])), + ?assertMatch([], gen_server:call(Pid, list_authz_cache)), + %% authz cache-clean node # Clears authorization cache on given node + {ok, _, _} = emqtt:subscribe(C, <<"topic/2">>, 1), + ?assertMatch([_], gen_server:call(Pid, list_authz_cache)), + ?assertMatch(ok, emqx_ctl:run_command(["authz", "cache-clean", "node", atom_to_list(node())])), + ?assertMatch([], gen_server:call(Pid, list_authz_cache)), + ok = emqtt:disconnect(C), ok. t_olp(_Config) -> diff --git a/apps/emqx_management/test/emqx_mgmt_data_backup_SUITE.erl b/apps/emqx_management/test/emqx_mgmt_data_backup_SUITE.erl index d7db39657..7cb2c9cf1 100644 --- a/apps/emqx_management/test/emqx_mgmt_data_backup_SUITE.erl +++ b/apps/emqx_management/test/emqx_mgmt_data_backup_SUITE.erl @@ -33,7 +33,7 @@ init_per_suite(Config) -> end_per_suite(_Config) -> ok. -init_per_testcase(TC = t_import_on_cluster, Config0) -> +init_per_testcase(TC = t_import_on_cluster, Config) -> %% Don't import listeners to avoid port conflicts %% when the same conf will be imported to another cluster meck:new(emqx_mgmt_listeners_conf, [passthrough]), @@ -50,17 +50,15 @@ init_per_testcase(TC = t_import_on_cluster, Config0) -> 1, {ok, #{changed => [], root_key => gateway}} ), - Config = [{tc_name, TC} | Config0], - [{cluster, cluster(Config)} | setup(Config)]; -init_per_testcase(TC = t_verify_imported_mnesia_tab_on_cluster, Config0) -> - Config = [{tc_name, TC} | Config0], - [{cluster, cluster(Config)} | setup(Config)]; + [{cluster, cluster(TC, Config)} | setup(TC, Config)]; +init_per_testcase(TC = t_verify_imported_mnesia_tab_on_cluster, Config) -> + [{cluster, cluster(TC, Config)} | setup(TC, Config)]; init_per_testcase(t_mnesia_bad_tab_schema, Config) -> meck:new(emqx_mgmt_data_backup, [passthrough]), meck:expect(TC = emqx_mgmt_data_backup, mnesia_tabs_to_backup, 0, [data_backup_test]), - setup([{tc_name, TC} | Config]); + setup(TC, Config); init_per_testcase(TC, Config) -> - setup([{tc_name, TC} | Config]). + setup(TC, Config). end_per_testcase(t_import_on_cluster, Config) -> emqx_cth_cluster:stop(?config(cluster, Config)), @@ -304,7 +302,7 @@ t_verify_imported_mnesia_tab_on_cluster(Config) -> {ok, Cwd} = file:get_cwd(), AbsFilePath = filename:join(Cwd, FileName), - [CoreNode1, CoreNode2, ReplicantNode] = NodesList = ?config(cluster, Config), + [CoreNode1, CoreNode2, ReplicantNode] = ?config(cluster, Config), [ {ok, _} = rpc:call(CoreNode1, emqx_dashboard_admin, add_user, [U, U, U]) @@ -328,10 +326,7 @@ t_verify_imported_mnesia_tab_on_cluster(Config) -> %% Give some extra time to replicant to import data... timer:sleep(3000), - ?assertEqual(AllUsers, lists:sort(rpc:call(ReplicantNode, mnesia, dirty_all_keys, [Tab]))), - - [rpc:call(N, ekka, leave, []) || N <- lists:reverse(NodesList)], - [emqx_common_test_helpers:stop_slave(N) || N <- NodesList]. + ?assertEqual(AllUsers, lists:sort(rpc:call(ReplicantNode, mnesia, dirty_all_keys, [Tab]))). t_mnesia_bad_tab_schema(_Config) -> OldAttributes = [id, name, description], @@ -386,8 +381,8 @@ t_read_files(_Config) -> %% Internal test helpers %%------------------------------------------------------------------------------ -setup(Config) -> - WorkDir = filename:join(work_dir(Config), local), +setup(TC, Config) -> + WorkDir = filename:join(emqx_cth_suite:work_dir(TC, Config), local), Started = emqx_cth_suite:start(apps_to_start(), #{work_dir => WorkDir}), [{suite_apps, Started} | Config]. @@ -408,20 +403,17 @@ recompose_version(MajorInt, MinorInt, Patch) -> [integer_to_list(MajorInt + 1), $., integer_to_list(MinorInt), $. | Patch] ). -cluster(Config) -> +cluster(TC, Config) -> Nodes = emqx_cth_cluster:start( [ {data_backup_core1, #{role => core, apps => apps_to_start()}}, {data_backup_core2, #{role => core, apps => apps_to_start()}}, {data_backup_replicant, #{role => replicant, apps => apps_to_start()}} ], - #{work_dir => work_dir(Config)} + #{work_dir => emqx_cth_suite:work_dir(TC, Config)} ), Nodes. -work_dir(Config) -> - filename:join(?config(priv_dir, Config), ?config(tc_name, Config)). - create_test_tab(Attributes) -> ok = mria:create_table(data_backup_test, [ {type, set}, @@ -440,8 +432,8 @@ create_test_tab(Attributes) -> apps_to_start() -> [ - {emqx, #{override_env => [{boot_modules, [broker, router]}]}}, - {emqx_conf, "dashboard.listeners.http.bind = 0"}, + {emqx, #{override_env => [{boot_modules, [broker]}]}}, + {emqx_conf, #{config => #{dashboard => #{listeners => #{http => #{bind => <<"0">>}}}}}}, emqx_psk, emqx_management, emqx_dashboard, diff --git a/apps/emqx_modules/src/emqx_modules.app.src b/apps/emqx_modules/src/emqx_modules.app.src index cd2f6c8b9..cceb51895 100644 --- a/apps/emqx_modules/src/emqx_modules.app.src +++ b/apps/emqx_modules/src/emqx_modules.app.src @@ -1,7 +1,7 @@ %% -*- mode: erlang -*- {application, emqx_modules, [ {description, "EMQX Modules"}, - {vsn, "5.0.20"}, + {vsn, "5.0.21"}, {modules, []}, {applications, [kernel, stdlib, emqx, emqx_ctl]}, {mod, {emqx_modules_app, []}}, diff --git a/apps/emqx_opentelemetry/src/emqx_opentelemetry.app.src b/apps/emqx_opentelemetry/src/emqx_opentelemetry.app.src index 7202b24c8..d459fc107 100644 --- a/apps/emqx_opentelemetry/src/emqx_opentelemetry.app.src +++ b/apps/emqx_opentelemetry/src/emqx_opentelemetry.app.src @@ -1,6 +1,6 @@ {application, emqx_opentelemetry, [ {description, "OpenTelemetry for EMQX Broker"}, - {vsn, "0.1.1"}, + {vsn, "0.1.2"}, {registered, []}, {mod, {emqx_otel_app, []}}, {applications, [kernel, stdlib, emqx]}, diff --git a/apps/emqx_oracle/src/emqx_oracle.app.src b/apps/emqx_oracle/src/emqx_oracle.app.src index e2d6d856f..c30e6be95 100644 --- a/apps/emqx_oracle/src/emqx_oracle.app.src +++ b/apps/emqx_oracle/src/emqx_oracle.app.src @@ -1,6 +1,6 @@ {application, emqx_oracle, [ {description, "EMQX Enterprise Oracle Database Connector"}, - {vsn, "0.1.5"}, + {vsn, "0.1.6"}, {registered, []}, {applications, [ kernel, diff --git a/apps/emqx_oracle/src/emqx_oracle_schema.erl b/apps/emqx_oracle/src/emqx_oracle_schema.erl index 3adde5af3..ba9904f19 100644 --- a/apps/emqx_oracle/src/emqx_oracle_schema.erl +++ b/apps/emqx_oracle/src/emqx_oracle_schema.erl @@ -21,7 +21,7 @@ roots() -> fields(config) -> Fields = [{server, server()}, {sid, fun sid/1}, {service_name, fun service_name/1}] ++ - emqx_connector_schema_lib:relational_db_fields() ++ + adjust_fields(emqx_connector_schema_lib:relational_db_fields()) ++ emqx_connector_schema_lib:prepare_statement_fields(), proplists:delete(database, Fields). @@ -38,3 +38,16 @@ service_name(type) -> binary(); service_name(desc) -> ?DESC(?REF_MODULE, "service_name"); service_name(required) -> false; service_name(_) -> undefined. + +adjust_fields(Fields) -> + lists:map( + fun + ({username, Sc}) -> + %% to please dialyzer... + Override = #{type => hocon_schema:field_schema(Sc, type), required => true}, + {username, hocon_schema:override(Sc, Override)}; + (Field) -> + Field + end, + Fields + ). diff --git a/apps/emqx_rule_engine/src/emqx_rule_engine.app.src b/apps/emqx_rule_engine/src/emqx_rule_engine.app.src index e6d00bcae..23e4a3f05 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_engine.app.src +++ b/apps/emqx_rule_engine/src/emqx_rule_engine.app.src @@ -2,7 +2,7 @@ {application, emqx_rule_engine, [ {description, "EMQX Rule Engine"}, % strict semver, bump manually! - {vsn, "5.0.23"}, + {vsn, "5.0.24"}, {modules, []}, {registered, [emqx_rule_engine_sup, emqx_rule_engine]}, {applications, [kernel, stdlib, rulesql, getopt, emqx_ctl, uuid]}, diff --git a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl index 038edea48..edaa5f25f 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_funcs.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_funcs.erl @@ -1174,15 +1174,30 @@ function_literal(Fun, Args) -> {invalid_func, {Fun, Args}}. mongo_date() -> - erlang:timestamp(). + maybe_isodate_format(erlang:timestamp()). mongo_date(MillisecondsTimestamp) -> - convert_timestamp(MillisecondsTimestamp). + maybe_isodate_format(convert_timestamp(MillisecondsTimestamp)). mongo_date(Timestamp, Unit) -> InsertedTimeUnit = time_unit(Unit), ScaledEpoch = erlang:convert_time_unit(Timestamp, InsertedTimeUnit, millisecond), - convert_timestamp(ScaledEpoch). + mongo_date(ScaledEpoch). + +maybe_isodate_format(ErlTimestamp) -> + case emqx_rule_sqltester:is_test_runtime_env() of + false -> + ErlTimestamp; + true -> + %% if this is called from sqltest, we need to convert it to the ISODate() format, + %% so that it can be correctly converted into a JSON string. + isodate_format(ErlTimestamp) + end. + +isodate_format({MegaSecs, Secs, MicroSecs}) -> + SystemTimeMs = (MegaSecs * 1000_000_000_000 + Secs * 1000_000 + MicroSecs) div 1000, + Ts3339Str = calendar:system_time_to_rfc3339(SystemTimeMs, [{unit, millisecond}, {offset, "Z"}]), + iolist_to_binary(["ISODate(", Ts3339Str, ")"]). convert_timestamp(MillisecondsTimestamp) -> MicroTimestamp = MillisecondsTimestamp * 1000, diff --git a/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl b/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl index f3b4e2790..867fffcc1 100644 --- a/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl +++ b/apps/emqx_rule_engine/src/emqx_rule_sqltester.erl @@ -18,7 +18,9 @@ -export([ test/1, - get_selected_data/3 + get_selected_data/3, + %% Some SQL functions return different results in the test environment + is_test_runtime_env/0 ]). -spec test(#{sql := binary(), context := map()}) -> {ok, map() | list()} | {error, term()}. @@ -63,12 +65,14 @@ test_rule(Sql, Select, Context, EventTopics) -> created_at => erlang:system_time(millisecond) }, FullContext = fill_default_values(hd(EventTopics), emqx_rule_maps:atom_key_map(Context)), + set_is_test_runtime_env(), try emqx_rule_runtime:apply_rule(Rule, FullContext, #{}) of {ok, Data} -> {ok, flatten(Data)}; {error, Reason} -> {error, Reason} after + unset_is_test_runtime_env(), ok = emqx_rule_engine:clear_metrics_for_rule(RuleId) end. @@ -97,3 +101,20 @@ envs_examp(EventTopic) -> emqx_rule_events:columns_with_exam(EventName) ) ). + +is_test_runtime_env_atom() -> + 'emqx_rule_sqltester:is_test_runtime_env'. + +set_is_test_runtime_env() -> + erlang:put(is_test_runtime_env_atom(), true), + ok. + +unset_is_test_runtime_env() -> + erlang:erase(is_test_runtime_env_atom()), + ok. + +is_test_runtime_env() -> + case erlang:get(is_test_runtime_env_atom()) of + true -> true; + _ -> false + end. diff --git a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_test_SUITE.erl b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_test_SUITE.erl index 575d35238..0c772958e 100644 --- a/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_test_SUITE.erl +++ b/apps/emqx_rule_engine/test/emqx_rule_engine_api_rule_test_SUITE.erl @@ -214,6 +214,27 @@ t_ctx_delivery_dropped(_) -> Expected = check_result([from_clientid, from_username, reason, qos, topic], [], Context), do_test(SQL, Context, Expected). +t_mongo_date_function_should_return_string_in_test_env(_) -> + SQL = + <<"SELECT mongo_date() as mongo_date FROM \"t/1\"">>, + Context = + #{ + action => <<"publish">>, + clientid => <<"c_emqx">>, + event_type => client_check_authz_complete, + result => <<"allow">>, + topic => <<"t/1">>, + username => <<"u_emqx">> + }, + CheckFunction = fun(Result) -> + MongoDate = maps:get(mongo_date, Result), + %% Use regex to match the expected string + MatchResult = re:run(MongoDate, <<"ISODate\\([0-9]{4}-[0-9]{2}-[0-9]{2}T.*\\)">>), + ?assertMatch({match, _}, MatchResult), + ok + end, + do_test(SQL, Context, CheckFunction). + do_test(SQL, Context, Expected0) -> Res = emqx_rule_engine_api:'/rule_test'( post, diff --git a/apps/emqx_schema_registry/src/emqx_schema_registry.app.src b/apps/emqx_schema_registry/src/emqx_schema_registry.app.src index b79c9ec01..9145f5dc0 100644 --- a/apps/emqx_schema_registry/src/emqx_schema_registry.app.src +++ b/apps/emqx_schema_registry/src/emqx_schema_registry.app.src @@ -1,6 +1,6 @@ {application, emqx_schema_registry, [ {description, "EMQX Schema Registry"}, - {vsn, "0.1.5"}, + {vsn, "0.1.6"}, {registered, [emqx_schema_registry_sup]}, {mod, {emqx_schema_registry_app, []}}, {included_applications, [ diff --git a/apps/emqx_schema_registry/src/emqx_schema_registry.erl b/apps/emqx_schema_registry/src/emqx_schema_registry.erl index 3f09ac347..6face86d6 100644 --- a/apps/emqx_schema_registry/src/emqx_schema_registry.erl +++ b/apps/emqx_schema_registry/src/emqx_schema_registry.erl @@ -64,7 +64,7 @@ get_serde(SchemaName) -> get_schema(SchemaName) -> case emqx_config:get( - [?CONF_KEY_ROOT, schemas, binary_to_atom(SchemaName)], undefined + [?CONF_KEY_ROOT, schemas, schema_name_bin_to_atom(SchemaName)], undefined ) of undefined -> @@ -333,6 +333,20 @@ async_delete_serdes(Names) -> to_bin(A) when is_atom(A) -> atom_to_binary(A); to_bin(B) when is_binary(B) -> B. +schema_name_bin_to_atom(Bin) when size(Bin) > 255 -> + erlang:throw( + iolist_to_binary( + io_lib:format( + "Name is is too long." + " Please provide a shorter name (<= 255 bytes)." + " The name that is too long: \"~s\"", + [Bin] + ) + ) + ); +schema_name_bin_to_atom(Bin) -> + binary_to_atom(Bin, utf8). + -spec serde_to_map(serde()) -> serde_map(). serde_to_map(#serde{} = Serde) -> #{ diff --git a/apps/emqx_schema_registry/test/emqx_schema_registry_SUITE.erl b/apps/emqx_schema_registry/test/emqx_schema_registry_SUITE.erl index e2a696428..7aea09457 100644 --- a/apps/emqx_schema_registry/test/emqx_schema_registry_SUITE.erl +++ b/apps/emqx_schema_registry/test/emqx_schema_registry_SUITE.erl @@ -368,7 +368,7 @@ cluster(Config) -> {load_apps, [emqx_machine]}, {env_handler, fun (emqx) -> - application:set_env(emqx, boot_modules, [broker, router]), + application:set_env(emqx, boot_modules, [broker]), ok; (emqx_conf) -> ok; diff --git a/build b/build index 03d1ce673..874e4088c 100755 --- a/build +++ b/build @@ -369,9 +369,9 @@ docker_cleanup() { ## Build the default docker image based on debian 11. make_docker() { - local EMQX_BUILDER_VERSION="${EMQX_BUILDER_VERSION:-5.1-3}" + local EMQX_BUILDER_VERSION="${EMQX_BUILDER_VERSION:-5.1-4}" local EMQX_BUILDER_PLATFORM="${EMQX_BUILDER_PLATFORM:-debian11}" - local EMQX_BUILDER_OTP="${EMQX_BUILDER_OTP:-25.3.2-1}" + local EMQX_BUILDER_OTP="${EMQX_BUILDER_OTP:-25.3.2-2}" local EMQX_BUILDER_ELIXIR="${EMQX_BUILDER_ELIXIR:-1.14.5}" local EMQX_BUILDER=${EMQX_BUILDER:-ghcr.io/emqx/emqx-builder/${EMQX_BUILDER_VERSION}:${EMQX_BUILDER_ELIXIR}-${EMQX_BUILDER_OTP}-${EMQX_BUILDER_PLATFORM}} local EMQX_RUNNER="${EMQX_RUNNER:-${EMQX_DEFAULT_RUNNER}}" diff --git a/changes/ce/feat-11487.en.md b/changes/ce/feat-11487.en.md new file mode 100644 index 000000000..352a11c06 --- /dev/null +++ b/changes/ce/feat-11487.en.md @@ -0,0 +1,2 @@ +The bcrypt work factor is limited to the range 5-10, because higher values consume too much CPU resources. +Bcrypt library is updated to allow parallel hash evaluation. diff --git a/changes/ce/fix-11493.en.md b/changes/ce/fix-11493.en.md new file mode 100644 index 000000000..93874933a --- /dev/null +++ b/changes/ce/fix-11493.en.md @@ -0,0 +1 @@ +Examples and documentation for /api/v5/publish bad request response have been fixed. Previously the documentation example said that the bad request response could return a list in the body which was not actually the case. diff --git a/changes/ce/fix-11499.en.md b/changes/ce/fix-11499.en.md new file mode 100644 index 000000000..3ed4d1e15 --- /dev/null +++ b/changes/ce/fix-11499.en.md @@ -0,0 +1,3 @@ +Upgrade Erlang/OTP to 25.3.2-2 + +Erlang/OTP 25.3.2-2 excludes sensitive data from mnesia_hook log message. diff --git a/changes/ce/fix-11506.en.md b/changes/ce/fix-11506.en.md new file mode 100644 index 000000000..7341134ac --- /dev/null +++ b/changes/ce/fix-11506.en.md @@ -0,0 +1,4 @@ +Don't download a trace log file if it is empty. + +After this fix, GET `/api/v5/trace/clientempty/download` returns 404 `{"code":"NOT_FOUND","message":"Trace is empty"}` +If no events matching the trace condition occurred. diff --git a/changes/ce/fix-11522.en.md b/changes/ce/fix-11522.en.md new file mode 100644 index 000000000..fdb56b4e2 --- /dev/null +++ b/changes/ce/fix-11522.en.md @@ -0,0 +1 @@ +Improved error message for rule engine schema registry when schema name exceeds permissible length. diff --git a/changes/ce/fix-11531.en.md b/changes/ce/fix-11531.en.md new file mode 100644 index 000000000..00296433a --- /dev/null +++ b/changes/ce/fix-11531.en.md @@ -0,0 +1 @@ +Fixed issue where authorization cache cleaning cli was not working properly for specific client ID. diff --git a/changes/ce/fix-11564.en.md b/changes/ce/fix-11564.en.md new file mode 100644 index 000000000..cf6aa28cb --- /dev/null +++ b/changes/ce/fix-11564.en.md @@ -0,0 +1,2 @@ +Fix cluster partition autoheal functionality. +Implement autohealing for the clusters that split into multiple partitions. diff --git a/changes/ce/perf-11532.en.md b/changes/ce/perf-11532.en.md new file mode 100644 index 000000000..a522f7828 --- /dev/null +++ b/changes/ce/perf-11532.en.md @@ -0,0 +1 @@ +Improve some error reasons when parsing invalid packets. diff --git a/changes/ee/fix-11394.en.md b/changes/ee/fix-11394.en.md new file mode 100644 index 000000000..ace678ecc --- /dev/null +++ b/changes/ee/fix-11394.en.md @@ -0,0 +1,2 @@ +Upgrade Kafka producer client `wolff` from 1.7.6 to 1.7.7. +This fixes a potential race condition which may cause all Kafka producers to crash if some failed to initialize. diff --git a/changes/ee/fix-11401.en.md b/changes/ee/fix-11401.en.md new file mode 100644 index 000000000..2bce7170a --- /dev/null +++ b/changes/ee/fix-11401.en.md @@ -0,0 +1 @@ +When running one of the rule engine SQL `mongo_date` functions in the EMQX dashboard test interface, the resulting date is formatted as `ISODate(*)`, where * is the date in ISO date format instead of only the ISO date string. This is the format used by MongoDB to store dates. diff --git a/changes/ee/fix-11542.en.md b/changes/ee/fix-11542.en.md new file mode 100644 index 000000000..0100677bf --- /dev/null +++ b/changes/ee/fix-11542.en.md @@ -0,0 +1 @@ +Enhanced Google ProtoBuf schema registry support: Now, when assigning a float to an integer using the rule engine functions `schema_encode` or `sparkplug_encode`, a `gpb_type_error` will be raised instead of the previous `badarith` error. diff --git a/changes/ee/fix-11547.en.md b/changes/ee/fix-11547.en.md new file mode 100644 index 000000000..1a79b32ea --- /dev/null +++ b/changes/ee/fix-11547.en.md @@ -0,0 +1,7 @@ +Fix several emqx_bridge issues: + +- fix Cassandra bridge connect error occurring when the bridge is configured without username/password + (Cassandra doesn't require user credentials when it is configured with `authenticator: AllowAllAuthenticator`) +- fix SQL Server bridge connect error caused by an empty password +- make `username` a required field in Oracle bridge +- fix IoTDB bridge error caused by setting base URL without scheme (e.g. `:`) diff --git a/changes/v5.1.6.en.md b/changes/v5.1.6.en.md new file mode 100644 index 000000000..1e92f61d1 --- /dev/null +++ b/changes/v5.1.6.en.md @@ -0,0 +1,30 @@ +# v5.1.6 + +## Enhancements + +- [#11429](https://github.com/emqx/emqx/pull/11429) Added option to configure detection of legacy protocol in MondoDB connectors and bridges. + +- [#11436](https://github.com/emqx/emqx/pull/11436) Added a new API endpoint `DELETE /banned` to clear all `banned` data. + +- [#11438](https://github.com/emqx/emqx/pull/11438) Changed the type of the `mqtt.max_packet_size` from string to byteSize to better represent the valid numeric range. + Strings will still be accepted for backwards compatibility. + +- [#11446](https://github.com/emqx/emqx/pull/11446) Refactored datetime-related modules and functions to simplify the code. + +- [#11396](https://github.com/emqx/emqx/pull/11396) Introduced topic index for the rule engine runtime that significantly improves the performance of EMQX with a non-trivial number of rules consuming messages matching different topic filters. + +## Bug Fixes + +- [#11424](https://github.com/emqx/emqx/pull/11424) Added a check for the maximum value of the timestamp in the API to ensure it is a valid Unix timestamp. + +- [#11445](https://github.com/emqx/emqx/pull/11445) Removed os_mon application monitor support on Windows platforms to prevent VM crashes. + Functionality remains on non-Windows platforms. + +- [#11454](https://github.com/emqx/emqx/pull/11454) Fixed crashing when debugging/tracing with large payloads(introduce when [#11279](https://github.com/emqx/emqx/pull/11279)) + +- [#11456](https://github.com/emqx/emqx/pull/11456) Removed validation that enforced non-empty PEM for CA cert file. + CA certificate file PEM can now be empty. + +- [#11499](https://github.com/emqx/emqx/pull/11499) Upgraded Erlang/OTP to 25.3.2-2. + + Erlang/OTP 25.3.2-2 excludes sensitive data from mnesia_hook log message. diff --git a/deploy/charts/emqx-enterprise/README.md b/deploy/charts/emqx-enterprise/README.md index b11159c84..d3977ceac 100644 --- a/deploy/charts/emqx-enterprise/README.md +++ b/deploy/charts/emqx-enterprise/README.md @@ -73,6 +73,7 @@ The following table lists the configurable parameters of the emqx chart and thei | `service.nodePorts.ws` | Kubernetes node port for WebSocket/HTTP. | nil | | `service.nodePorts.wss` | Kubernetes node port for WSS/HTTPS. | nil | | `service.nodePorts.dashboard` | Kubernetes node port for dashboard. | nil | +| `service.loadBalancerClass` | The load balancer implementation this Service belongs to | | | `service.loadBalancerIP` | loadBalancerIP for Service | nil | | `service.loadBalancerSourceRanges` | Address(es) that are allowed when service is LoadBalancer | [] | | `service.externalIPs` | ExternalIPs for the service | [] | diff --git a/deploy/charts/emqx-enterprise/templates/service.yaml b/deploy/charts/emqx-enterprise/templates/service.yaml index dea548653..525390a90 100644 --- a/deploy/charts/emqx-enterprise/templates/service.yaml +++ b/deploy/charts/emqx-enterprise/templates/service.yaml @@ -18,6 +18,9 @@ spec: externalTrafficPolicy: {{ .Values.service.externalTrafficPolicy | default "Cluster" }} {{- end }} {{- if eq .Values.service.type "LoadBalancer" }} + {{- if .Values.service.loadBalancerClass }} + loadBalancerClass: {{ .Values.service.loadBalancerClass }} + {{- end }} {{- if .Values.service.loadBalancerIP }} loadBalancerIP: {{ .Values.service.loadBalancerIP }} {{- end }} diff --git a/deploy/charts/emqx-enterprise/values.yaml b/deploy/charts/emqx-enterprise/values.yaml index 37fa56348..e830b81af 100644 --- a/deploy/charts/emqx-enterprise/values.yaml +++ b/deploy/charts/emqx-enterprise/values.yaml @@ -163,6 +163,10 @@ service: wss: dashboard: dashboardtls: + ## Specifies the load balancer implementation this Service belongs to. + ## Once set, it can not be changed. + ## + # loadBalancerClass: ## Set the LoadBalancer service type to internal only. ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#internal-load-balancer ## @@ -245,7 +249,7 @@ ssl: useExisting: false existingName: emqx-tls dnsnames: [] - commonName: + commonName: issuer: name: letsencrypt-dns kind: ClusterIssuer diff --git a/deploy/charts/emqx/Chart.yaml b/deploy/charts/emqx/Chart.yaml index 1451347e2..f8cd69735 100644 --- a/deploy/charts/emqx/Chart.yaml +++ b/deploy/charts/emqx/Chart.yaml @@ -14,8 +14,8 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 5.1.5-build.3 +version: 5.1.6 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 5.1.5-build.3 +appVersion: 5.1.6 diff --git a/deploy/charts/emqx/README.md b/deploy/charts/emqx/README.md index 0221f5114..d9e144f0c 100644 --- a/deploy/charts/emqx/README.md +++ b/deploy/charts/emqx/README.md @@ -74,6 +74,7 @@ The following table lists the configurable parameters of the emqx chart and thei | `service.nodePorts.ws` | Kubernetes node port for WebSocket/HTTP. | nil | | `service.nodePorts.wss` | Kubernetes node port for WSS/HTTPS. | nil | | `service.nodePorts.dashboard` | Kubernetes node port for dashboard. | nil | +| `service.loadBalancerClass` | The load balancer implementation this Service belongs to | | | `service.loadBalancerIP` | loadBalancerIP for Service | nil | | `service.loadBalancerSourceRanges` | Address(es) that are allowed when service is LoadBalancer | [] | | `service.externalIPs` | ExternalIPs for the service | [] | diff --git a/deploy/charts/emqx/templates/service.yaml b/deploy/charts/emqx/templates/service.yaml index dea548653..525390a90 100644 --- a/deploy/charts/emqx/templates/service.yaml +++ b/deploy/charts/emqx/templates/service.yaml @@ -18,6 +18,9 @@ spec: externalTrafficPolicy: {{ .Values.service.externalTrafficPolicy | default "Cluster" }} {{- end }} {{- if eq .Values.service.type "LoadBalancer" }} + {{- if .Values.service.loadBalancerClass }} + loadBalancerClass: {{ .Values.service.loadBalancerClass }} + {{- end }} {{- if .Values.service.loadBalancerIP }} loadBalancerIP: {{ .Values.service.loadBalancerIP }} {{- end }} diff --git a/deploy/charts/emqx/values.yaml b/deploy/charts/emqx/values.yaml index 791db5812..88cc6279f 100644 --- a/deploy/charts/emqx/values.yaml +++ b/deploy/charts/emqx/values.yaml @@ -163,6 +163,10 @@ service: wss: dashboard: dashboardtls: + ## Specifies the load balancer implementation this Service belongs to. + ## Once set, it can not be changed. + ## + # loadBalancerClass: ## Set the LoadBalancer service type to internal only. ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#internal-load-balancer ## @@ -245,7 +249,7 @@ ssl: useExisting: false existingName: emqx-tls dnsnames: [] - commonName: + commonName: issuer: name: letsencrypt-dns kind: ClusterIssuer diff --git a/deploy/docker/Dockerfile b/deploy/docker/Dockerfile index 61a143cae..76ded75eb 100644 --- a/deploy/docker/Dockerfile +++ b/deploy/docker/Dockerfile @@ -1,4 +1,4 @@ -ARG BUILD_FROM=ghcr.io/emqx/emqx-builder/5.1-3:1.14.5-25.3.2-1-debian11 +ARG BUILD_FROM=ghcr.io/emqx/emqx-builder/5.1-4:1.14.5-25.3.2-2-debian11 ARG RUN_FROM=debian:11-slim FROM ${BUILD_FROM} AS builder diff --git a/dev b/dev index 20cd66569..7768fbcf6 100755 --- a/dev +++ b/dev @@ -43,7 +43,7 @@ OPTIONS: -c|--compile: Force recompile, otherwise starts with the already built libs in '_build/\$PROFILE/lib/'. -e|--ekka-epmd: Force to use ekka_epmd. - -n|--name: Node name, defaults to \$EMQX_NODE_NAME env. + -n|--name: Node name, defaults to \$EMQX_NODE__NAME or the \$EMQX_NODE_NAME env. ENVIRONMENT VARIABLES: @@ -63,7 +63,7 @@ export HOCON_ENV_OVERRIDE_PREFIX='EMQX_' export EMQX_LOG__FILE__DEFAULT__ENABLE='false' export EMQX_LOG__CONSOLE__ENABLE='true' SYSTEM="$(./scripts/get-distro.sh)" -EMQX_NODE_NAME="${EMQX_NODE_NAME:-emqx@127.0.0.1}" +EMQX_NODE_NAME="${EMQX_NODE__NAME:-${EMQX_NODE_NAME:-emqx@127.0.0.1}}" PROFILE="${PROFILE:-emqx}" FORCE_COMPILE=0 # Do not start using ekka epmd by default, so your IDE can connect to it @@ -158,7 +158,7 @@ export EMQX_LOG_DIR="$BASE_DIR/log" export EMQX_PLUGINS__INSTALL_DIR="${EMQX_PLUGINS__INSTALL_DIR:-$BASE_DIR/plugins}" CONFIGS_DIR="$EMQX_DATA_DIR/configs" # Use your cookie so your IDE can connect to it. -COOKIE="${EMQX_NODE__COOKIE:-${EMQX_NODE_COOKIE:-$(cat ~/.erlang.cookie || echo 'emqxsecretcookie')}}" +COOKIE="${EMQX_NODE__COOKIE:-${EMQX_NODE_COOKIE:-$(cat ~/.erlang.cookie 2>/dev/null || echo 'emqxsecretcookie')}}" mkdir -p "$EMQX_ETC_DIR" "$EMQX_DATA_DIR/patches" "$EMQX_DATA_DIR/plugins" "$EMQX_DATA_DIR/certs" "$EMQX_LOG_DIR" "$CONFIGS_DIR" if [ $EKKA_EPMD -eq 1 ]; then EPMD_ARGS='-start_epmd false -epmd_module ekka_epmd' diff --git a/mix.exs b/mix.exs index 7a9c0033a..29596c872 100644 --- a/mix.exs +++ b/mix.exs @@ -55,7 +55,7 @@ defmodule EMQXUmbrella.MixProject do {:cowboy, github: "emqx/cowboy", tag: "2.9.2", override: true}, {:esockd, github: "emqx/esockd", tag: "5.9.6", override: true}, {:rocksdb, github: "emqx/erlang-rocksdb", tag: "1.8.0-emqx-1", override: true}, - {:ekka, github: "emqx/ekka", tag: "0.15.10", override: true}, + {:ekka, github: "emqx/ekka", tag: "0.15.11", override: true}, {:gen_rpc, github: "emqx/gen_rpc", tag: "2.8.1", override: true}, {:grpc, github: "emqx/grpc-erl", tag: "0.6.8", override: true}, {:minirest, github: "emqx/minirest", tag: "1.3.11", override: true}, @@ -73,7 +73,7 @@ defmodule EMQXUmbrella.MixProject do {:getopt, "1.0.2", override: true}, {:snabbkaffe, github: "kafka4beam/snabbkaffe", tag: "1.0.8", override: true}, {:hocon, github: "emqx/hocon", tag: "0.39.16", override: true}, - {:emqx_http_lib, github: "emqx/emqx_http_lib", tag: "0.5.2", override: true}, + {:emqx_http_lib, github: "emqx/emqx_http_lib", tag: "0.5.3", override: true}, {:esasl, github: "emqx/esasl", tag: "0.2.0"}, {:jose, github: "potatosalad/erlang-jose", tag: "1.11.2"}, # in conflict by ehttpc and emqtt @@ -234,7 +234,7 @@ defmodule EMQXUmbrella.MixProject do [ {:hstreamdb_erl, github: "hstreamdb/hstreamdb_erl", tag: "0.4.5+v0.16.1"}, {:influxdb, github: "emqx/influxdb-client-erl", tag: "1.1.11", override: true}, - {:wolff, github: "kafka4beam/wolff", tag: "1.7.6"}, + {:wolff, github: "kafka4beam/wolff", tag: "1.7.7"}, {:kafka_protocol, github: "kafka4beam/kafka_protocol", tag: "4.1.3", override: true}, {:brod_gssapi, github: "kafka4beam/brod_gssapi", tag: "v0.1.0"}, {:brod, github: "kafka4beam/brod", tag: "3.16.8"}, @@ -820,7 +820,7 @@ defmodule EMQXUmbrella.MixProject do defp bcrypt_dep() do if enable_bcrypt?(), - do: [{:bcrypt, github: "emqx/erlang-bcrypt", tag: "0.6.0", override: true}], + do: [{:bcrypt, github: "emqx/erlang-bcrypt", tag: "0.6.1", override: true}], else: [] end diff --git a/rebar.config b/rebar.config index 4df8dea83..a8cc269e6 100644 --- a/rebar.config +++ b/rebar.config @@ -62,7 +62,7 @@ , {cowboy, {git, "https://github.com/emqx/cowboy", {tag, "2.9.2"}}} , {esockd, {git, "https://github.com/emqx/esockd", {tag, "5.9.6"}}} , {rocksdb, {git, "https://github.com/emqx/erlang-rocksdb", {tag, "1.8.0-emqx-1"}}} - , {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.15.10"}}} + , {ekka, {git, "https://github.com/emqx/ekka", {tag, "0.15.11"}}} , {gen_rpc, {git, "https://github.com/emqx/gen_rpc", {tag, "2.8.1"}}} , {grpc, {git, "https://github.com/emqx/grpc-erl", {tag, "0.6.8"}}} , {minirest, {git, "https://github.com/emqx/minirest", {tag, "1.3.11"}}} @@ -76,7 +76,7 @@ , {getopt, "1.0.2"} , {snabbkaffe, {git, "https://github.com/kafka4beam/snabbkaffe.git", {tag, "1.0.8"}}} , {hocon, {git, "https://github.com/emqx/hocon.git", {tag, "0.39.16"}}} - , {emqx_http_lib, {git, "https://github.com/emqx/emqx_http_lib.git", {tag, "0.5.2"}}} + , {emqx_http_lib, {git, "https://github.com/emqx/emqx_http_lib.git", {tag, "0.5.3"}}} , {esasl, {git, "https://github.com/emqx/esasl", {tag, "0.2.0"}}} , {jose, {git, "https://github.com/potatosalad/erlang-jose", {tag, "1.11.2"}}} , {telemetry, "1.1.0"} diff --git a/rebar.config.erl b/rebar.config.erl index 8f26d11d8..3efdfe079 100644 --- a/rebar.config.erl +++ b/rebar.config.erl @@ -36,7 +36,7 @@ assert_otp() -> end. bcrypt() -> - {bcrypt, {git, "https://github.com/emqx/erlang-bcrypt.git", {tag, "0.6.0"}}}. + {bcrypt, {git, "https://github.com/emqx/erlang-bcrypt.git", {tag, "0.6.1"}}}. quicer() -> {quicer, {git, "https://github.com/emqx/quic.git", {tag, "0.0.114"}}}. diff --git a/rel/config/examples/listeners.quic.conf.example b/rel/config/examples/listeners.quic.conf.example index 52161e828..9ec2646a5 100644 --- a/rel/config/examples/listeners.quic.conf.example +++ b/rel/config/examples/listeners.quic.conf.example @@ -5,7 +5,7 @@ listeners.quic.my_quick_listener_name { bind = 14567 ## or with an IP, e.g. "127.0.0.1:14567" ## When publishing or subscribing, prefix all topics with a mountpoint string - mountpoint = "${clientid}/msg" + ## mountpoint = "${clientid}/msg" ## Client authentication ## Type: diff --git a/rel/config/examples/listeners.tcp.conf.example b/rel/config/examples/listeners.tcp.conf.example index f03d98cc2..7f4dcdfd7 100644 --- a/rel/config/examples/listeners.tcp.conf.example +++ b/rel/config/examples/listeners.tcp.conf.example @@ -11,7 +11,7 @@ listeners.tcp.my_tcp_listener_name { proxy_protocol_timeout = 8 ## When publishing or subscribing, prefix all topics with a mountpoint string - mountpoint = "mqtt" ## Do not set this unless you know what is it for + ## mountpoint = "mqtt" ## Do not set this unless you know what is it for ## Client authentication ## Type: diff --git a/scripts/buildx.sh b/scripts/buildx.sh index 462ab6612..662a7233c 100755 --- a/scripts/buildx.sh +++ b/scripts/buildx.sh @@ -9,7 +9,7 @@ ## example: ## ./scripts/buildx.sh --profile emqx --pkgtype tgz --arch arm64 \ -## --builder ghcr.io/emqx/emqx-builder/5.1-3:1.14.5-25.3.2-1-debian11 +## --builder ghcr.io/emqx/emqx-builder/5.1-4:1.14.5-25.3.2-2-debian11 set -euo pipefail @@ -24,7 +24,7 @@ help() { echo "--arch amd64|arm64: Target arch to build the EMQX package for" echo "--src_dir : EMQX source code in this dir, default to PWD" echo "--builder : Builder image to pull" - echo " E.g. ghcr.io/emqx/emqx-builder/5.1-3:1.14.5-25.3.2-1-debian11" + echo " E.g. ghcr.io/emqx/emqx-builder/5.1-4:1.14.5-25.3.2-2-debian11" } die() { diff --git a/scripts/check_missing_reboot_apps.exs b/scripts/check_missing_reboot_apps.exs index d9933e099..91d4b39ea 100755 --- a/scripts/check_missing_reboot_apps.exs +++ b/scripts/check_missing_reboot_apps.exs @@ -24,10 +24,10 @@ apps = :xref.start(:xref) :xref.set_default(:xref, warnings: false) -rel_dir = '_build/#{profile}/lib/' +rel_dir = ~c"_build/#{profile}/lib/" :xref.add_release(:xref, rel_dir) -{:ok, calls} = :xref.q(:xref, '(App) (XC | [#{Enum.join(apps, ",")}] || mria:create_table/_)') +{:ok, calls} = :xref.q(:xref, ~c"(App) (XC | [#{Enum.join(apps, ",")}] || mria:create_table/_)") emqx_calls = calls diff --git a/scripts/find-suites.sh b/scripts/find-suites.sh index 685ab5ec8..47799f885 100755 --- a/scripts/find-suites.sh +++ b/scripts/find-suites.sh @@ -19,8 +19,14 @@ if [ -n "${EMQX_CT_SUITES:-}" ]; then fi TESTDIR="$1/test" +INTEGRATION_TESTDIR="$1/integration_test" # Get the output of the find command IFS=$'\n' read -r -d '' -a FILES < <(find "${TESTDIR}" -name "*_SUITE.erl" 2>/dev/null | sort && printf '\0') +if [[ -d "${INTEGRATION_TESTDIR}" ]]; then + IFS=$'\n' read -r -d '' -a FILES_INTEGRATION < <(find "${INTEGRATION_TESTDIR}" -name "*_SUITE.erl" 2>/dev/null | sort && printf '\0') +fi +# shellcheck disable=SC2206 +FILES+=(${FILES_INTEGRATION:-}) SUITEGROUP_RAW="${SUITEGROUP:-1_1}" SUITEGROUP="$(echo "$SUITEGROUP_RAW" | cut -d '_' -f1)" diff --git a/scripts/pr-sanity-checks.sh b/scripts/pr-sanity-checks.sh index 6b193b74e..19321230b 100755 --- a/scripts/pr-sanity-checks.sh +++ b/scripts/pr-sanity-checks.sh @@ -12,8 +12,8 @@ if ! type "yq" > /dev/null; then exit 1 fi -EMQX_BUILDER_VERSION=${EMQX_BUILDER_VERSION:-5.1-3} -EMQX_BUILDER_OTP=${EMQX_BUILDER_OTP:-25.3.2-1} +EMQX_BUILDER_VERSION=${EMQX_BUILDER_VERSION:-5.1-4} +EMQX_BUILDER_OTP=${EMQX_BUILDER_OTP:-25.3.2-2} EMQX_BUILDER_ELIXIR=${EMQX_BUILDER_ELIXIR:-1.14.5} EMQX_BUILDER_PLATFORM=${EMQX_BUILDER_PLATFORM:-ubuntu22.04} EMQX_BUILDER=${EMQX_BUILDER:-ghcr.io/emqx/emqx-builder/${EMQX_BUILDER_VERSION}:${EMQX_BUILDER_ELIXIR}-${EMQX_BUILDER_OTP}-${EMQX_BUILDER_PLATFORM}} diff --git a/scripts/relup-test/start-relup-test-cluster.sh b/scripts/relup-test/start-relup-test-cluster.sh index 9cc0eaffe..2cee1394e 100755 --- a/scripts/relup-test/start-relup-test-cluster.sh +++ b/scripts/relup-test/start-relup-test-cluster.sh @@ -22,7 +22,7 @@ WEBHOOK="webhook.$NET" BENCH="bench.$NET" COOKIE='this-is-a-secret' ## Erlang image is needed to run webhook server and emqtt-bench -ERLANG_IMAGE="ghcr.io/emqx/emqx-builder/5.1-3:1.14.5-25.3.2-1-ubuntu20.04" +ERLANG_IMAGE="ghcr.io/emqx/emqx-builder/5.1-4:1.14.5-25.3.2-2-ubuntu20.04" # builder has emqtt-bench installed BENCH_IMAGE="$ERLANG_IMAGE"