diff --git a/.gitignore b/.gitignore index 4e53f8f05..be08dee97 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ lib -ebin/* +ebin *.log + diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..658a64435 --- /dev/null +++ b/LICENSE @@ -0,0 +1,15 @@ +The emqtt server is licensed under the MPL. + +The files below copied from rabbitmq and licensed under LICENSE-MPL-RabbitMQ: + +credit_flow.erl +file_handle_cache.erl +gen_server2.erl +priority_queue.erl +supervisor2.erl +tcp_acceptor.erl +tcp_acceptor_sup.erl +tcp_listener.erl +tcp_listener_sup.erl + +If you have any questions regarding licensing, please contact ery.lee@gmail.com diff --git a/LICENSE-MPL-RabbitMQ b/LICENSE-MPL-RabbitMQ new file mode 100644 index 000000000..d50e32efc --- /dev/null +++ b/LICENSE-MPL-RabbitMQ @@ -0,0 +1,455 @@ + MOZILLA PUBLIC LICENSE + Version 1.1 + + --------------- + +1. Definitions. + + 1.0.1. "Commercial Use" means distribution or otherwise making the + Covered Code available to a third party. + + 1.1. "Contributor" means each entity that creates or contributes to + the creation of Modifications. + + 1.2. "Contributor Version" means the combination of the Original + Code, prior Modifications used by a Contributor, and the Modifications + made by that particular Contributor. + + 1.3. "Covered Code" means the Original Code or Modifications or the + combination of the Original Code and Modifications, in each case + including portions thereof. + + 1.4. "Electronic Distribution Mechanism" means a mechanism generally + accepted in the software development community for the electronic + transfer of data. + + 1.5. "Executable" means Covered Code in any form other than Source + Code. + + 1.6. "Initial Developer" means the individual or entity identified + as the Initial Developer in the Source Code notice required by Exhibit + A. + + 1.7. "Larger Work" means a work which combines Covered Code or + portions thereof with code not governed by the terms of this License. + + 1.8. "License" means this document. + + 1.8.1. "Licensable" means having the right to grant, to the maximum + extent possible, whether at the time of the initial grant or + subsequently acquired, any and all of the rights conveyed herein. + + 1.9. "Modifications" means any addition to or deletion from the + substance or structure of either the Original Code or any previous + Modifications. When Covered Code is released as a series of files, a + Modification is: + A. Any addition to or deletion from the contents of a file + containing Original Code or previous Modifications. + + B. Any new file that contains any part of the Original Code or + previous Modifications. + + 1.10. "Original Code" means Source Code of computer software code + which is described in the Source Code notice required by Exhibit A as + Original Code, and which, at the time of its release under this + License is not already Covered Code governed by this License. + + 1.10.1. "Patent Claims" means any patent claim(s), now owned or + hereafter acquired, including without limitation, method, process, + and apparatus claims, in any patent Licensable by grantor. + + 1.11. "Source Code" means the preferred form of the Covered Code for + making modifications to it, including all modules it contains, plus + any associated interface definition files, scripts used to control + compilation and installation of an Executable, or source code + differential comparisons against either the Original Code or another + well known, available Covered Code of the Contributor's choice. The + Source Code can be in a compressed or archival form, provided the + appropriate decompression or de-archiving software is widely available + for no charge. + + 1.12. "You" (or "Your") means an individual or a legal entity + exercising rights under, and complying with all of the terms of, this + License or a future version of this License issued under Section 6.1. + For legal entities, "You" includes any entity which controls, is + controlled by, or is under common control with You. For purposes of + this definition, "control" means (a) the power, direct or indirect, + to cause the direction or management of such entity, whether by + contract or otherwise, or (b) ownership of more than fifty percent + (50%) of the outstanding shares or beneficial ownership of such + entity. + +2. Source Code License. + + 2.1. The Initial Developer Grant. + The Initial Developer hereby grants You a world-wide, royalty-free, + non-exclusive license, subject to third party intellectual property + claims: + (a) under intellectual property rights (other than patent or + trademark) Licensable by Initial Developer to use, reproduce, + modify, display, perform, sublicense and distribute the Original + Code (or portions thereof) with or without Modifications, and/or + as part of a Larger Work; and + + (b) under Patents Claims infringed by the making, using or + selling of Original Code, to make, have made, use, practice, + sell, and offer for sale, and/or otherwise dispose of the + Original Code (or portions thereof). + + (c) the licenses granted in this Section 2.1(a) and (b) are + effective on the date Initial Developer first distributes + Original Code under the terms of this License. + + (d) Notwithstanding Section 2.1(b) above, no patent license is + granted: 1) for code that You delete from the Original Code; 2) + separate from the Original Code; or 3) for infringements caused + by: i) the modification of the Original Code or ii) the + combination of the Original Code with other software or devices. + + 2.2. Contributor Grant. + Subject to third party intellectual property claims, each Contributor + hereby grants You a world-wide, royalty-free, non-exclusive license + + (a) under intellectual property rights (other than patent or + trademark) Licensable by Contributor, to use, reproduce, modify, + display, perform, sublicense and distribute the Modifications + created by such Contributor (or portions thereof) either on an + unmodified basis, with other Modifications, as Covered Code + and/or as part of a Larger Work; and + + (b) under Patent Claims infringed by the making, using, or + selling of Modifications made by that Contributor either alone + and/or in combination with its Contributor Version (or portions + of such combination), to make, use, sell, offer for sale, have + made, and/or otherwise dispose of: 1) Modifications made by that + Contributor (or portions thereof); and 2) the combination of + Modifications made by that Contributor with its Contributor + Version (or portions of such combination). + + (c) the licenses granted in Sections 2.2(a) and 2.2(b) are + effective on the date Contributor first makes Commercial Use of + the Covered Code. + + (d) Notwithstanding Section 2.2(b) above, no patent license is + granted: 1) for any code that Contributor has deleted from the + Contributor Version; 2) separate from the Contributor Version; + 3) for infringements caused by: i) third party modifications of + Contributor Version or ii) the combination of Modifications made + by that Contributor with other software (except as part of the + Contributor Version) or other devices; or 4) under Patent Claims + infringed by Covered Code in the absence of Modifications made by + that Contributor. + +3. Distribution Obligations. + + 3.1. Application of License. + The Modifications which You create or to which You contribute are + governed by the terms of this License, including without limitation + Section 2.2. The Source Code version of Covered Code may be + distributed only under the terms of this License or a future version + of this License released under Section 6.1, and You must include a + copy of this License with every copy of the Source Code You + distribute. You may not offer or impose any terms on any Source Code + version that alters or restricts the applicable version of this + License or the recipients' rights hereunder. However, You may include + an additional document offering the additional rights described in + Section 3.5. + + 3.2. Availability of Source Code. + Any Modification which You create or to which You contribute must be + made available in Source Code form under the terms of this License + either on the same media as an Executable version or via an accepted + Electronic Distribution Mechanism to anyone to whom you made an + Executable version available; and if made available via Electronic + Distribution Mechanism, must remain available for at least twelve (12) + months after the date it initially became available, or at least six + (6) months after a subsequent version of that particular Modification + has been made available to such recipients. You are responsible for + ensuring that the Source Code version remains available even if the + Electronic Distribution Mechanism is maintained by a third party. + + 3.3. Description of Modifications. + You must cause all Covered Code to which You contribute to contain a + file documenting the changes You made to create that Covered Code and + the date of any change. You must include a prominent statement that + the Modification is derived, directly or indirectly, from Original + Code provided by the Initial Developer and including the name of the + Initial Developer in (a) the Source Code, and (b) in any notice in an + Executable version or related documentation in which You describe the + origin or ownership of the Covered Code. + + 3.4. Intellectual Property Matters + (a) Third Party Claims. + If Contributor has knowledge that a license under a third party's + intellectual property rights is required to exercise the rights + granted by such Contributor under Sections 2.1 or 2.2, + Contributor must include a text file with the Source Code + distribution titled "LEGAL" which describes the claim and the + party making the claim in sufficient detail that a recipient will + know whom to contact. If Contributor obtains such knowledge after + the Modification is made available as described in Section 3.2, + Contributor shall promptly modify the LEGAL file in all copies + Contributor makes available thereafter and shall take other steps + (such as notifying appropriate mailing lists or newsgroups) + reasonably calculated to inform those who received the Covered + Code that new knowledge has been obtained. + + (b) Contributor APIs. + If Contributor's Modifications include an application programming + interface and Contributor has knowledge of patent licenses which + are reasonably necessary to implement that API, Contributor must + also include this information in the LEGAL file. + + (c) Representations. + Contributor represents that, except as disclosed pursuant to + Section 3.4(a) above, Contributor believes that Contributor's + Modifications are Contributor's original creation(s) and/or + Contributor has sufficient rights to grant the rights conveyed by + this License. + + 3.5. Required Notices. + You must duplicate the notice in Exhibit A in each file of the Source + Code. If it is not possible to put such notice in a particular Source + Code file due to its structure, then You must include such notice in a + location (such as a relevant directory) where a user would be likely + to look for such a notice. If You created one or more Modification(s) + You may add your name as a Contributor to the notice described in + Exhibit A. You must also duplicate this License in any documentation + for the Source Code where You describe recipients' rights or ownership + rights relating to Covered Code. You may choose to offer, and to + charge a fee for, warranty, support, indemnity or liability + obligations to one or more recipients of Covered Code. However, You + may do so only on Your own behalf, and not on behalf of the Initial + Developer or any Contributor. You must make it absolutely clear than + any such warranty, support, indemnity or liability obligation is + offered by You alone, and You hereby agree to indemnify the Initial + Developer and every Contributor for any liability incurred by the + Initial Developer or such Contributor as a result of warranty, + support, indemnity or liability terms You offer. + + 3.6. Distribution of Executable Versions. + You may distribute Covered Code in Executable form only if the + requirements of Section 3.1-3.5 have been met for that Covered Code, + and if You include a notice stating that the Source Code version of + the Covered Code is available under the terms of this License, + including a description of how and where You have fulfilled the + obligations of Section 3.2. The notice must be conspicuously included + in any notice in an Executable version, related documentation or + collateral in which You describe recipients' rights relating to the + Covered Code. You may distribute the Executable version of Covered + Code or ownership rights under a license of Your choice, which may + contain terms different from this License, provided that You are in + compliance with the terms of this License and that the license for the + Executable version does not attempt to limit or alter the recipient's + rights in the Source Code version from the rights set forth in this + License. If You distribute the Executable version under a different + license You must make it absolutely clear that any terms which differ + from this License are offered by You alone, not by the Initial + Developer or any Contributor. You hereby agree to indemnify the + Initial Developer and every Contributor for any liability incurred by + the Initial Developer or such Contributor as a result of any such + terms You offer. + + 3.7. Larger Works. + You may create a Larger Work by combining Covered Code with other code + not governed by the terms of this License and distribute the Larger + Work as a single product. In such a case, You must make sure the + requirements of this License are fulfilled for the Covered Code. + +4. Inability to Comply Due to Statute or Regulation. + + If it is impossible for You to comply with any of the terms of this + License with respect to some or all of the Covered Code due to + statute, judicial order, or regulation then You must: (a) comply with + the terms of this License to the maximum extent possible; and (b) + describe the limitations and the code they affect. Such description + must be included in the LEGAL file described in Section 3.4 and must + be included with all distributions of the Source Code. Except to the + extent prohibited by statute or regulation, such description must be + sufficiently detailed for a recipient of ordinary skill to be able to + understand it. + +5. Application of this License. + + This License applies to code to which the Initial Developer has + attached the notice in Exhibit A and to related Covered Code. + +6. Versions of the License. + + 6.1. New Versions. + Netscape Communications Corporation ("Netscape") may publish revised + and/or new versions of the License from time to time. Each version + will be given a distinguishing version number. + + 6.2. Effect of New Versions. + Once Covered Code has been published under a particular version of the + License, You may always continue to use it under the terms of that + version. You may also choose to use such Covered Code under the terms + of any subsequent version of the License published by Netscape. No one + other than Netscape has the right to modify the terms applicable to + Covered Code created under this License. + + 6.3. Derivative Works. + If You create or use a modified version of this License (which you may + only do in order to apply it to code which is not already Covered Code + governed by this License), You must (a) rename Your license so that + the phrases "Mozilla", "MOZILLAPL", "MOZPL", "Netscape", + "MPL", "NPL" or any confusingly similar phrase do not appear in your + license (except to note that your license differs from this License) + and (b) otherwise make it clear that Your version of the license + contains terms which differ from the Mozilla Public License and + Netscape Public License. (Filling in the name of the Initial + Developer, Original Code or Contributor in the notice described in + Exhibit A shall not of themselves be deemed to be modifications of + this License.) + +7. DISCLAIMER OF WARRANTY. + + COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS, + WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + WITHOUT LIMITATION, WARRANTIES THAT THE COVERED CODE IS FREE OF + DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. + THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED CODE + IS WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE IN ANY RESPECT, + YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE + COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER + OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF + ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. + +8. TERMINATION. + + 8.1. This License and the rights granted hereunder will terminate + automatically if You fail to comply with terms herein and fail to cure + such breach within 30 days of becoming aware of the breach. All + sublicenses to the Covered Code which are properly granted shall + survive any termination of this License. Provisions which, by their + nature, must remain in effect beyond the termination of this License + shall survive. + + 8.2. If You initiate litigation by asserting a patent infringement + claim (excluding declatory judgment actions) against Initial Developer + or a Contributor (the Initial Developer or Contributor against whom + You file such action is referred to as "Participant") alleging that: + + (a) such Participant's Contributor Version directly or indirectly + infringes any patent, then any and all rights granted by such + Participant to You under Sections 2.1 and/or 2.2 of this License + shall, upon 60 days notice from Participant terminate prospectively, + unless if within 60 days after receipt of notice You either: (i) + agree in writing to pay Participant a mutually agreeable reasonable + royalty for Your past and future use of Modifications made by such + Participant, or (ii) withdraw Your litigation claim with respect to + the Contributor Version against such Participant. If within 60 days + of notice, a reasonable royalty and payment arrangement are not + mutually agreed upon in writing by the parties or the litigation claim + is not withdrawn, the rights granted by Participant to You under + Sections 2.1 and/or 2.2 automatically terminate at the expiration of + the 60 day notice period specified above. + + (b) any software, hardware, or device, other than such Participant's + Contributor Version, directly or indirectly infringes any patent, then + any rights granted to You by such Participant under Sections 2.1(b) + and 2.2(b) are revoked effective as of the date You first made, used, + sold, distributed, or had made, Modifications made by that + Participant. + + 8.3. If You assert a patent infringement claim against Participant + alleging that such Participant's Contributor Version directly or + indirectly infringes any patent where such claim is resolved (such as + by license or settlement) prior to the initiation of patent + infringement litigation, then the reasonable value of the licenses + granted by such Participant under Sections 2.1 or 2.2 shall be taken + into account in determining the amount or value of any payment or + license. + + 8.4. In the event of termination under Sections 8.1 or 8.2 above, + all end user license agreements (excluding distributors and resellers) + which have been validly granted by You or any distributor hereunder + prior to termination shall survive termination. + +9. LIMITATION OF LIABILITY. + + UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT + (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL + DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED CODE, + OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR + ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY + CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, + WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER + COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN + INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF + LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY + RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW + PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE + EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO + THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. + +10. U.S. GOVERNMENT END USERS. + + The Covered Code is a "commercial item," as that term is defined in + 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer + software" and "commercial computer software documentation," as such + terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 + C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), + all U.S. Government End Users acquire Covered Code with only those + rights set forth herein. + +11. MISCELLANEOUS. + + This License represents the complete agreement concerning subject + matter hereof. If any provision of this License is held to be + unenforceable, such provision shall be reformed only to the extent + necessary to make it enforceable. This License shall be governed by + California law provisions (except to the extent applicable law, if + any, provides otherwise), excluding its conflict-of-law provisions. + With respect to disputes in which at least one party is a citizen of, + or an entity chartered or registered to do business in the United + States of America, any litigation relating to this License shall be + subject to the jurisdiction of the Federal Courts of the Northern + District of California, with venue lying in Santa Clara County, + California, with the losing party responsible for costs, including + without limitation, court costs and reasonable attorneys' fees and + expenses. The application of the United Nations Convention on + Contracts for the International Sale of Goods is expressly excluded. + Any law or regulation which provides that the language of a contract + shall be construed against the drafter shall not apply to this + License. + +12. RESPONSIBILITY FOR CLAIMS. + + As between Initial Developer and the Contributors, each party is + responsible for claims and damages arising, directly or indirectly, + out of its utilization of rights under this License and You agree to + work with Initial Developer and Contributors to distribute such + responsibility on an equitable basis. Nothing herein is intended or + shall be deemed to constitute any admission of liability. + +13. MULTIPLE-LICENSED CODE. + + Initial Developer may designate portions of the Covered Code as + "Multiple-Licensed". "Multiple-Licensed" means that the Initial + Developer permits you to utilize portions of the Covered Code under + Your choice of the NPL or the alternative licenses, if any, specified + by the Initial Developer in the file described in Exhibit A. + +EXHIBIT A -Mozilla Public License. + + ``The contents of this file are subject to the Mozilla Public License + Version 1.1 (the "License"); you may not use this file except in + compliance with the License. You may obtain a copy of the License at + http://www.mozilla.org/MPL/ + + Software distributed under the License is distributed on an "AS IS" + basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the + License for the specific language governing rights and limitations + under the License. + + The Original Code is RabbitMQ. + + The Initial Developer of the Original Code is VMware, Inc. + Copyright (c) 2007-2012 VMware, Inc. All rights reserved.'' + + [NOTE: The text of this Exhibit A may differ slightly from the text of + the notices in the Source Code files of the Original Code. You should + use the text of this Exhibit A rather than the text found in the + Original Code Source Code for Your Modifications.] diff --git a/Makefile b/Makefile index 1b8b44d39..1b95d6333 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,10 @@ all: compile -run: compile - erl -pa ebin -pa lib/rabbitlib/ebin -config etc/emqtt.config -s emqtt_app start - compile: deps - rebar compile + ./rebar compile deps: - rebar get-deps + ./rebar get-deps + +clean: + ./rebar clean diff --git a/README.md b/README.md index 4bb7d9c0c..e8004f512 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,32 @@ emqtt ===== -erlang mqtt broker \ No newline at end of file +erlang mqtt broker based on rabbitmq network layer. + +compile +======= + +require erlang R15B+ and git client + +make + +start +====== + +./bin/emqtt console + +./bin/emqtt start + +stop +==== + +./bin/emqtt stop + +status +====== +./bin/emqtt_ctl status + +logs +==== + +log/* diff --git a/TODO b/TODO index 9b74fc1b7..da5e437ef 100644 --- a/TODO +++ b/TODO @@ -1,2 +1,2 @@ -1. Topic Trie -2. MQTT frame parse +OK 1. Topic Trie +OK 2. MQTT frame parse diff --git a/bin/emqtt b/bin/emqtt new file mode 100755 index 000000000..d71aa4f10 --- /dev/null +++ b/bin/emqtt @@ -0,0 +1,179 @@ +#!/bin/bash +# -*- tab-width:4;indent-tabs-mode:nil -*- +# ex: ts=4 sw=4 et + +RUNNER_SCRIPT_DIR=$(cd ${0%/*} && pwd) + +RUNNER_BASE_DIR=${RUNNER_SCRIPT_DIR%/*} +RUNNER_ETC_DIR=$RUNNER_BASE_DIR/etc +RUNNER_LOG_DIR=$RUNNER_BASE_DIR/log +# Note the trailing slash on $PIPE_DIR/ +PIPE_DIR=/tmp/$RUNNER_BASE_DIR/ +RUNNER_USER= + +# Make sure this script is running as the appropriate user +if [ ! -z "$RUNNER_USER" ] && [ `whoami` != "$RUNNER_USER" ]; then + exec sudo -u $RUNNER_USER -i $0 $@ +fi + +# Make sure CWD is set to runner base dir +cd $RUNNER_BASE_DIR + +# Make sure log directory exists +mkdir -p $RUNNER_LOG_DIR +# Identify the script name +SCRIPT=`basename $0` + +# Parse out release and erts info +ERLANG_BASE_DIR=/usr/local/lib/erlang +START_ERL=`cat $ERLANG_BASE_DIR/releases/start_erl.data` +ERTS_VSN=${START_ERL% *} +APP_VSN=${START_ERL#* } + +VMARGS_PATH="$RUNNER_ETC_DIR/emqtt.args" + +CONFIG_PATH="$RUNNER_ETC_DIR/emqtt.config" + +# Extract the target node name from node.args +NAME_ARG=`egrep '^-s?name' $VMARGS_PATH` +if [ -z "$NAME_ARG" ]; then + echo "emqtt.args needs to have either -name or -sname parameter." + exit 1 +fi + +# Extract the target cookie +COOKIE_ARG=`grep '^-setcookie' $VMARGS_PATH` +if [ -z "$COOKIE_ARG" ]; then + echo "emqtt.args needs to have a -setcookie parameter." + exit 1 +fi + +# Add ERTS bin dir to our path +ERTS_PATH=$ERLANG_BASE_DIR/erts-$ERTS_VSN/bin + +# Setup command to control the node +NODETOOL="escript $RUNNER_BASE_DIR/bin/nodetool $NAME_ARG $COOKIE_ARG" + +# Check the first argument for instructions +case "$1" in + start) + # Make sure there is not already a node running + RES=`$NODETOOL ping` + if [ "$RES" = "pong" ]; then + echo "Node is already running!" + exit 1 + fi + HEART_COMMAND="$RUNNER_BASE_DIR/bin/$SCRIPT start" + export HEART_COMMAND + mkdir -p $PIPE_DIR + shift # remove $1 + $ERTS_PATH/run_erl -daemon $PIPE_DIR $RUNNER_LOG_DIR "exec $RUNNER_BASE_DIR/bin/$SCRIPT console $@" 2>&1 + ;; + + stop) + # Wait for the node to completely stop... + case `uname -s` in + Linux|Darwin|FreeBSD|DragonFly|NetBSD|OpenBSD) + # PID COMMAND + PID=`ps ax -o pid= -o command=|\ + grep "$RUNNER_BASE_DIR/.*/[b]eam"|awk '{print $1}'` + ;; + SunOS) + # PID COMMAND + PID=`ps -ef -o pid= -o args=|\ + grep "$RUNNER_BASE_DIR/.*/[b]eam"|awk '{print $1}'` + ;; + CYGWIN*) + # UID PID PPID TTY STIME COMMAND + PID=`ps -efW|grep "$RUNNER_BASE_DIR/.*/[b]eam"|awk '{print $2}'` + ;; + esac + $NODETOOL stop + ES=$? + if [ "$ES" -ne 0 ]; then + exit $ES + fi + while `kill -0 $PID 2>/dev/null`; + do + sleep 1 + done + ;; + + restart) + ## Restart the VM without exiting the process + $NODETOOL restart + ES=$? + if [ "$ES" -ne 0 ]; then + exit $ES + fi + ;; + + reboot) + ## Restart the VM completely (uses heart to restart it) + $NODETOOL reboot + ES=$? + if [ "$ES" -ne 0 ]; then + exit $ES + fi + ;; + + ping) + ## See if the VM is alive + $NODETOOL ping + ES=$? + if [ "$ES" -ne 0 ]; then + exit $ES + fi + ;; + + attach) + # Make sure a node IS running + RES=`$NODETOOL ping` + ES=$? + if [ "$ES" -ne 0 ]; then + echo "Node is not running!" + exit $ES + fi + + shift + exec $ERTS_PATH/to_erl $PIPE_DIR + ;; + + console) + # .boot file typically just $SCRIPT (ie, the app name) + # however, for debugging, sometimes start_clean.boot is useful: + #case "$1" in + # console) BOOTFILE=$SCRIPT ;; + # console_clean) BOOTFILE=start_clean ;; + #esac + # Setup beam-required vars + ERL_LIBS=$RUNNER_BASE_DIR/lib + ROOTDIR=$RUNNER_BASE_DIR + BINDIR=$ERTS_PATH + EMU=beam + PROGNAME=`echo $0 | sed 's/.*\\///'` + CMD="$ERTS_PATH/erl -pa $RUNNER_BASE_DIR/ebin -config $CONFIG_PATH -args_file $VMARGS_PATH -- ${1+"$@"}" + export ERL_LIBS + export EMU + export ROOTDIR + export BINDIR + export PROGNAME + + # Dump environment info for logging purposes + echo "Exec: $CMD" + echo "Root: $ROOTDIR" + + # Log the startup + logger -t "$SCRIPT[$$]" "Starting up" + + # Start the VM + exec $CMD + ;; + + *) + echo "Usage: $SCRIPT {start|stop|restart|reboot|ping|console||attach}" + exit 1 + ;; +esac + +exit 0 diff --git a/bin/emqtt_ctl b/bin/emqtt_ctl new file mode 100755 index 000000000..edd24c2dc --- /dev/null +++ b/bin/emqtt_ctl @@ -0,0 +1,123 @@ +#!/bin/bash + +RUNNER_SCRIPT_DIR=$(cd ${0%/*} && pwd) + +RUNNER_BASE_DIR=${RUNNER_SCRIPT_DIR%/*} +RUNNER_ETC_DIR=$RUNNER_BASE_DIR/etc +RUNNER_BIN_DIR=$RUNNER_BASE_DIR/bin +RUNNER_LOG_DIR=$RUNNER_BASE_DIR/log + +RUNNER_EBIN_DIR=$RUNNER_BASE_DIR/ebin +RUNNER_USER= + +# Make sure CWD is set to runner base dir +cd $RUNNER_BASE_DIR + +# Extract the target node name from node.args +NAME_ARG=`grep '\-[s]*name' $RUNNER_ETC_DIR/emqtt.args` +if [ -z "$NAME_ARG" ]; then + echo "emqtt.args needs to have either -name or -sname parameter." + exit 1 +fi + +# Learn how to specify node name for connection from remote nodes +echo "$NAME_ARG" | grep '^-sname' > /dev/null 2>&1 +if [ "X$?" = "X0" ]; then + NAME_PARAM="-sname" + NAME_HOST="" +else + NAME_PARAM="-name" + echo "$NAME_ARG" | grep '@.*' > /dev/null 2>&1 + if [ "X$?" = "X0" ]; then + NAME_HOST=`echo "${NAME_ARG}" | sed -e 's/.*\(@.*\)$/\1/'` + else + NAME_HOST="" + fi +fi + +# Extract the target cookie +COOKIE_ARG=`grep '\-setcookie' $RUNNER_ETC_DIR/emqtt.args` +if [ -z "$COOKIE_ARG" ]; then + echo "emqtt.args needs to have a -setcookie parameter." + exit 1 +fi + +# Identify the script name +SCRIPT=`basename $0` + +# Parse out release and erts info +ERLANG_BASE_DIR=/usr/local/lib/erlang +START_ERL=`cat $ERLANG_BASE_DIR/releases/start_erl.data` +ERTS_VSN=${START_ERL% *} +APP_VSN=${START_ERL#* } + +# Add ERTS bin dir to our path +ERTS_PATH=$ERLANG_BASE_DIR/erts-$ERTS_VSN/bin + +# Setup command to control the node +NODETOOL="$ERTS_PATH/escript $RUNNER_BIN_DIR/nodetool $NAME_ARG $COOKIE_ARG" + +# Check the first argument for instructions +case "$1" in + status) + if [ $# -ne 1 ]; then + echo "Usage: $SCRIPT status" + exit 1 + fi + + # Make sure the local node IS running + RES=`$NODETOOL ping` + if [ "$RES" != "pong" ]; then + echo "Node is not running!" + exit 1 + fi + shift + + $NODETOOL rpc emqtt_ctl status $@ + ;; + + cluster_info) + if [ $# -ne 1 ]; then + echo "Usage: $SCRIPT cluster_info" + exit 1 + fi + + # Make sure the local node IS running + RES=`$NODETOOL ping` + if [ "$RES" != "pong" ]; then + echo "Node is not running!" + exit 1 + fi + shift + + $NODETOOL rpc emqtt_ctl cluster_info $@ + ;; + + + cluster) + if [ $# -ne 2 ]; then + echo "Usage: $SCRIPT cluster " + exit 1 + fi + + # Make sure the local node IS running + RES=`$NODETOOL ping` + if [ "$RES" != "pong" ]; then + echo "emqtt is not running!" + exit 1 + fi + shift + + $NODETOOL rpc emqtt_ctl cluster $@ + ;; + + *) + echo "Usage: $SCRIPT" + echo " status #query emqtt status" + echo " cluster_info #query cluster nodes" + echo " cluster #cluster node" + exit 1 + ;; + +esac + diff --git a/bin/nodetool b/bin/nodetool new file mode 100755 index 000000000..1030c9355 --- /dev/null +++ b/bin/nodetool @@ -0,0 +1,138 @@ +%% -*- mode: erlang;erlang-indent-level: 4;indent-tabs-mode: nil -*- +%% ex: ft=erlang ts=4 sw=4 et +%% ------------------------------------------------------------------- +%% +%% nodetool: Helper Script for interacting with live nodes +%% +%% ------------------------------------------------------------------- + +main(Args) -> + ok = start_epmd(), + %% Extract the args + {RestArgs, TargetNode} = process_args(Args, [], undefined), + + %% See if the node is currently running -- if it's not, we'll bail + case {net_kernel:hidden_connect_node(TargetNode), net_adm:ping(TargetNode)} of + {true, pong} -> + ok; + {_, pang} -> + io:format("Node ~p not responding to pings.\n", [TargetNode]), + halt(1) + end, + + case RestArgs of + ["ping"] -> + %% If we got this far, the node already responsed to a ping, so just dump + %% a "pong" + io:format("pong\n"); + ["stop"] -> + io:format("~p\n", [rpc:call(TargetNode, init, stop, [], 60000)]); + ["restart"] -> + io:format("~p\n", [rpc:call(TargetNode, init, restart, [], 60000)]); + ["reboot"] -> + io:format("~p\n", [rpc:call(TargetNode, init, reboot, [], 60000)]); + ["rpc", Module, Function | RpcArgs] -> + case rpc:call(TargetNode, list_to_atom(Module), list_to_atom(Function), + RpcArgs, 60000) of + ok -> + ok; + {badrpc, Reason} -> + io:format("RPC to ~p failed: ~p\n", [TargetNode, Reason]), + halt(1); + _ -> + halt(1) + end; + ["rpcterms", Module, Function, ArgsAsString] -> + case rpc:call(TargetNode, list_to_atom(Module), list_to_atom(Function), + consult(ArgsAsString), 60000) of + {badrpc, Reason} -> + io:format("RPC to ~p failed: ~p\n", [TargetNode, Reason]), + halt(1); + Other -> + io:format("~p\n", [Other]) + end; + Other -> + io:format("Other: ~p\n", [Other]), + io:format("Usage: nodetool {ping|stop|restart|reboot}\n") + end, + net_kernel:stop(). + +process_args([], Acc, TargetNode) -> + {lists:reverse(Acc), TargetNode}; +process_args(["-setcookie", Cookie | Rest], Acc, TargetNode) -> + erlang:set_cookie(node(), list_to_atom(Cookie)), + process_args(Rest, Acc, TargetNode); +process_args(["-name", TargetName | Rest], Acc, _) -> + ThisNode = append_node_suffix(TargetName, "_ctrl_"), + {ok, _} = net_kernel:start([ThisNode, longnames]), + process_args(Rest, Acc, nodename(TargetName)); +process_args(["-sname", TargetName | Rest], Acc, _) -> + ThisNode = append_node_suffix(TargetName, "_ctrl_"), + {ok, _} = net_kernel:start([ThisNode, shortnames]), + process_args(Rest, Acc, nodename(TargetName)); +process_args([Arg | Rest], Acc, Opts) -> + process_args(Rest, [Arg | Acc], Opts). + + +start_epmd() -> + [] = os:cmd(epmd_path() ++ " -daemon"), + ok. + +epmd_path() -> + ErtsBinDir = filename:dirname(escript:script_name()), + Name = "epmd", + case os:find_executable(Name, ErtsBinDir) of + false -> + case os:find_executable(Name) of + false -> + io:format("Could not find epmd.~n"), + halt(1); + GlobalEpmd -> + GlobalEpmd + end; + Epmd -> + Epmd + end. + + +nodename(Name) -> + case string:tokens(Name, "@") of + [_Node, _Host] -> + list_to_atom(Name); + [Node] -> + [_, Host] = string:tokens(atom_to_list(node()), "@"), + list_to_atom(lists:concat([Node, "@", Host])) + end. + +append_node_suffix(Name, Suffix) -> + case string:tokens(Name, "@") of + [Node, Host] -> + list_to_atom(lists:concat([Node, Suffix, os:getpid(), "@", Host])); + [Node] -> + list_to_atom(lists:concat([Node, Suffix, os:getpid()])) + end. + + +%% +%% Given a string or binary, parse it into a list of terms, ala file:consult/0 +%% +consult(Str) when is_list(Str) -> + consult([], Str, []); +consult(Bin) when is_binary(Bin)-> + consult([], binary_to_list(Bin), []). + +consult(Cont, Str, Acc) -> + case erl_scan:tokens(Cont, Str, 0) of + {done, Result, Remaining} -> + case Result of + {ok, Tokens, _} -> + {ok, Term} = erl_parse:parse_term(Tokens), + consult([], Remaining, [Term | Acc]); + {eof, _Other} -> + lists:reverse(Acc); + {error, Info, _} -> + {error, Info} + end; + {more, Cont1} -> + consult(Cont1, eof, Acc) + end. diff --git a/etc/emqtt.args b/etc/emqtt.args new file mode 100644 index 000000000..e564c943f --- /dev/null +++ b/etc/emqtt.args @@ -0,0 +1,28 @@ +## Name of the node +-sname emqtt + +## Cookie for distributed erlang +-setcookie emqttsecret + +#-boot start_sasl + +-s emqtt start + +## Heartbeat management; auto-restarts VM if it dies or becomes unresponsive +## (Disabled by default..use with caution!) +##-heart +-smp auto + +## Enable kernel poll and a few async threads ++K true ++A 64 ++P 10000 + +## Increase number of concurrent ports/sockets +-env ERL_MAX_PORTS 4096 + +-env ERL_MAX_ETS_TABLES 1400 + +## Tweak GC to run more often +-env ERL_FULLSWEEP_AFTER 10 + diff --git a/etc/emqtt.config b/etc/emqtt.config index eca6ec071..4512e5848 100644 --- a/etc/emqtt.config +++ b/etc/emqtt.config @@ -10,16 +10,27 @@ {mnesia, [ {dir, "var/mnesia"} ]}, + {lager, [ + {error_logger_redirect, false}, + {crash_log, "log/emqtt_crash.log"}, + {handlers, [ + {lager_console_backend, info}, + {lager_file_backend, [ + {"log/emqtt_error.log", error, 10485760, "$D0", 5}, + {"log/emqtt_info.log", info, 10485760, "$D0", 5} + ]} + ]} + ]}, {emqtt, [ - {tcp_listeners, [1883]}, - {tcp_listen_options, [binary, - {packet, raw}, - {reuseaddr, true}, - {backlog, 128}, - {nodelay, true}, - {linger, {true, 0}}, - {exit_on_close, false}]} + {listeners, [ + {1883, [ + binary, + {packet, raw}, + {reuseaddr, true}, + {backlog, 128}, + {nodelay, true} + ]} + ]} ]} ]. - diff --git a/include/emqtt.hrl b/include/emqtt.hrl index 971238c02..3b3866ca3 100644 --- a/include/emqtt.hrl +++ b/include/emqtt.hrl @@ -1,6 +1,25 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% +%% The Initial Developer of the Original Code is ery.lee@gmail.com +%% Copyright (c) 2012 Ery Lee. All rights reserved. +%% + +%% --------------------------------- +%% banner +%% --------------------------------- +-define(COPYRIGHT, "Copyright (C) 2012 Ery Lee."). --define(COPYRIGHT, "Copyright (C) 2007-2012 VMware, Inc."). -define(LICENSE_MESSAGE, "Licensed under the MPL."). + -define(PROTOCOL_VERSION, "MQTT/3.1"). -define(ERTS_MINIMUM, "5.6.3"). @@ -9,3 +28,66 @@ -record(subscriber, {topic, pid}). +%% --------------------------------- +%% Logging mechanism + +-define(PRINT(Format, Args), + io:format(Format, Args)). + +-define(PRINT_MSG(Msg), + io:format(Msg)). + +-define(DEBUG(Format, Args), + lager:debug(Format, Args)). + +-define(DEBUG_TRACE(Dest, Format, Args), + lager:debug(Dest, Format, Args)). + +-define(DEBUG_MSG(Msg), + lager:debug(Msg)). + +-define(INFO(Format, Args), + lager:info(Format, Args)). + +-define(INFO_TRACE(Dest, Format, Args), + lager:info(Dest, Format, Args)). + +-define(INFO_MSG(Msg), + lager:info(Msg)). + +-define(WARN(Format, Args), + lager:warning(Format, Args)). + +-define(WARN_TRACE(Dest, Format, Args), + lager:warning(Dest, Format, Args)). + +-define(WARN_MSG(Msg), + lager:warning(Msg)). + +-define(WARNING(Format, Args), + lager:warning(Format, Args)). + +-define(WARNING_TRACE(Dest, Format, Args), + lager:warning(Dest, Format, Args)). + +-define(WARNING_MSG(Msg), + lager:warning(Msg)). + +-define(ERROR(Format, Args), + lager:error(Format, Args)). + +-define(ERROR_TRACE(Dest, Format, Args), + lager:error(Dest, Format, Args)). + +-define(ERROR_MSG(Msg), + lager:error(Msg)). + +-define(CRITICAL(Format, Args), + lager:critical(Format, Args)). + +-define(CRITICAL_TRACE(Dest, Format, Args), + lager:critical(Dest, Format, Args)). + +-define(CRITICAL_MSG(Msg), + lager:critical(Msg)). + diff --git a/rebar.config b/rebar.config index c50c864af..85d41db0b 100644 --- a/rebar.config +++ b/rebar.config @@ -1,8 +1,11 @@ +{erl_opts, [debug_info, {parse_transform, lager_transform}]}. + +{erl_opts, [{i, "include"}]}. {lib_dirs,["lib"]}. {deps_dir, ["lib"]}. {deps, [ - {'rabbitlib', ".*", {git, "git://github.com/emqtt/rabbitlib.git", {branch, "master"}}} + {lager, ".*", {git, "git://github.com/basho/lager.git", {branch, "master"}}} ]}. diff --git a/run b/run deleted file mode 100755 index 9976d51bf..000000000 --- a/run +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -erl -pa ebin -pa lib/rabbitlib/ebin -config etc/emqtt.config -s emqtt_app start diff --git a/src/.emqtt_listener.erl.swp b/src/.emqtt_listener.erl.swp new file mode 100644 index 000000000..d2864b969 Binary files /dev/null and b/src/.emqtt_listener.erl.swp differ diff --git a/src/emqtt.erl b/src/emqtt.erl new file mode 100644 index 000000000..3ae6127ad --- /dev/null +++ b/src/emqtt.erl @@ -0,0 +1,11 @@ +-module(emqtt). + +-export([start/0]). + +start() -> + ok = application:start(sasl), + mnesia:create_schema([node()]), + mnesia:start(), + lager:start(), + ok = application:start(emqtt). + diff --git a/src/emqtt_app.erl b/src/emqtt_app.erl index e29320ba2..8ff827ea3 100644 --- a/src/emqtt_app.erl +++ b/src/emqtt_app.erl @@ -1,32 +1,20 @@ + -module(emqtt_app). --export([start/0]). +-include("emqtt.hrl"). -behaviour(application). %% Application callbacks -export([start/2, stop/1]). --define(APPS, [sasl, mnesia, emqtt]). - -start() -> - [start_app(App) || App <- ?APPS]. - -start_app(mnesia) -> - mnesia:create_schema([node()]), - mnesia:start(); - -start_app(App) -> - application:start(App). - %% =================================================================== %% Application callbacks %% =================================================================== start(_StartType, _StartArgs) -> - {ok, Sup} = emqtt_sup:start_link(), - emqtt_networking:boot(), - {ok, Sup}. + {ok, Listeners} = application:get_env(listeners), + emqtt_sup:start_link(Listeners). stop(_State) -> ok. diff --git a/src/emqtt_client.erl b/src/emqtt_client.erl index 9325b63e6..be7fa4545 100644 --- a/src/emqtt_client.erl +++ b/src/emqtt_client.erl @@ -179,7 +179,7 @@ run_socket(State = #state{ socket = Sock }) -> control_throttle(State = #state{ connection_state = Flow, conserve = Conserve }) -> - case {Flow, Conserve orelse credit_flow:blocked()} of + case {Flow, Conserve} of {running, true} -> State #state{ connection_state = blocked }; {blocked, false} -> run_socket(State #state{ connection_state = running }); diff --git a/src/emqtt_client_sup.erl b/src/emqtt_client_sup.erl index be21d86e9..df33865d0 100644 --- a/src/emqtt_client_sup.erl +++ b/src/emqtt_client_sup.erl @@ -1,6 +1,6 @@ -module(emqtt_client_sup). --export([start_link/0]). +-export([start_link/0, start_client/1]). -behaviour(supervisor2). @@ -14,3 +14,13 @@ init([]) -> [{client, {emqtt_client, start_link, []}, temporary, 5000, worker, [emqtt_client]}]}}. +start_client(Sock) -> + {ok, Client} = supervisor:start_child(?MODULE, []), + ok = gen_tcp:controlling_process(Sock, Client), + emqtt_client:go(Client, Sock), + + %% see comment in rabbit_networking:start_client/2 + gen_event:which_handlers(error_logger), + + Client. + diff --git a/src/emqtt_ctl.erl b/src/emqtt_ctl.erl new file mode 100644 index 000000000..574e40079 --- /dev/null +++ b/src/emqtt_ctl.erl @@ -0,0 +1,28 @@ +-module(emqtt_ctl). + +-include("emqtt.hrl"). + +-compile(export_all). + +status() -> + {InternalStatus, _ProvidedStatus} = init:get_status(), + ?PRINT("Node ~p is ~p.", [node(), InternalStatus]), + case lists:keysearch(wifioss, 1, application:which_applications()) of + false -> + ?PRINT_MSG("wifioss is not running~n"); + {value,_Version} -> + ?PRINT_MSG("wifioss is running~n") + end. + +cluster_info() -> + Nodes = [node()|nodes()], + ?PRINT("cluster nodes: ~p~n", [Nodes]). + +cluster(Node) -> + case net_adm:ping(list_to_atom(Node)) of + pong -> + ?PRINT("cluster with ~p successfully.~n", [Node]); + pang -> + ?PRINT("failed to cluster with ~p~n", [Node]) + end. + diff --git a/src/emqtt_listener.erl b/src/emqtt_listener.erl new file mode 100644 index 000000000..c6c558182 --- /dev/null +++ b/src/emqtt_listener.erl @@ -0,0 +1,52 @@ +-module(emqtt_listener). + +-include("emqtt.hrl"). + +-export([spec/2, listener_started/3, listener_stopped/3]). + +spec({Listener, SockOpts}, Callback) -> + [tcp_listener_spec(emqtt_tcp_listener_sup, Address, SockOpts, + mqtt, "TCP Listener", Callback) || Address <- tcp_listener_addresses(Listener)]. + +tcp_listener_spec(NamePrefix, {IPAddress, Port, Family}, SocketOpts, + Protocol, Label, OnConnect) -> + {emqtt_net:tcp_name(NamePrefix, IPAddress, Port), + {tcp_listener_sup, start_link, + [IPAddress, Port, [Family | SocketOpts], + {?MODULE, listener_started, [Protocol]}, + {?MODULE, listener_stopped, [Protocol]}, + OnConnect, Label]}, + transient, infinity, supervisor, [tcp_listener_sup]}. + +tcp_listener_addresses(Port) when is_integer(Port) -> + tcp_listener_addresses_auto(Port); +tcp_listener_addresses({"auto", Port}) -> + %% Variant to prevent lots of hacking around in bash and batch files + tcp_listener_addresses_auto(Port); +tcp_listener_addresses({Host, Port}) -> + %% auto: determine family IPv4 / IPv6 after converting to IP address + tcp_listener_addresses({Host, Port, auto}); +tcp_listener_addresses({Host, Port, Family0}) + when is_integer(Port) andalso (Port >= 0) andalso (Port =< 65535) -> + [{IPAddress, Port, Family} || + {IPAddress, Family} <- emqtt_net:getaddr(Host, Family0)]; +tcp_listener_addresses({_Host, Port, _Family0}) -> + ?ERROR("invalid port ~p - not 0..65535~n", [Port]), + throw({error, {invalid_port, Port}}). + +tcp_listener_addresses_auto(Port) -> + lists:append([tcp_listener_addresses(Listener) || + Listener <- emqtt_net:port_to_listeners(Port)]). + +%-------------------------------------------- +%TODO: callback +%-------------------------------------------- +listener_started(Protocol, IPAddress, Port) -> + %% We need the ip to distinguish e.g. 0.0.0.0 and 127.0.0.1 + %% We need the host so we can distinguish multiple instances of the above + %% in a cluster. + ?INFO("tcp listener started: ~p ~p:~p", [Protocol, IPAddress, Port]). + +listener_stopped(Protocol, IPAddress, Port) -> + ?INFO("tcp listener stopped: ~p ~p:~p", [Protocol, IPAddress, Port]). + diff --git a/src/emqtt_net.erl b/src/emqtt_net.erl index 68d322747..03146c4d2 100644 --- a/src/emqtt_net.erl +++ b/src/emqtt_net.erl @@ -1,9 +1,137 @@ -module(emqtt_net). +-export([tcp_name/3, tcp_host/1, getaddr/2, port_to_listeners/1]). + -export([tune_buffer_size/1, connection_string/2]). -include_lib("kernel/include/inet.hrl"). +-define(FIRST_TEST_BIND_PORT, 10000). + +%%-------------------------------------------------------------------- + +%% inet_parse:address takes care of ip string, like "0.0.0.0" +%% inet:getaddr returns immediately for ip tuple {0,0,0,0}, +%% and runs 'inet_gethost' port process for dns lookups. +%% On Windows inet:getaddr runs dns resolver for ip string, which may fail. +getaddr(Host, Family) -> + case inet_parse:address(Host) of + {ok, IPAddress} -> [{IPAddress, resolve_family(IPAddress, Family)}]; + {error, _} -> gethostaddr(Host, Family) + end. + +gethostaddr(Host, auto) -> + Lookups = [{Family, inet:getaddr(Host, Family)} || Family <- [inet, inet6]], + case [{IP, Family} || {Family, {ok, IP}} <- Lookups] of + [] -> host_lookup_error(Host, Lookups); + IPs -> IPs + end; + +gethostaddr(Host, Family) -> + case inet:getaddr(Host, Family) of + {ok, IPAddress} -> [{IPAddress, Family}]; + {error, Reason} -> host_lookup_error(Host, Reason) + end. + +host_lookup_error(Host, Reason) -> + error_logger:error_msg("invalid host ~p - ~p~n", [Host, Reason]), + throw({error, {invalid_host, Host, Reason}}). + +resolve_family({_,_,_,_}, auto) -> inet; +resolve_family({_,_,_,_,_,_,_,_}, auto) -> inet6; +resolve_family(IP, auto) -> throw({error, {strange_family, IP}}); +resolve_family(_, F) -> F. + +%%-------------------------------------------------------------------- + +%% There are three kinds of machine (for our purposes). +%% +%% * Those which treat IPv4 addresses as a special kind of IPv6 address +%% ("Single stack") +%% - Linux by default, Windows Vista and later +%% - We also treat any (hypothetical?) IPv6-only machine the same way +%% * Those which consider IPv6 and IPv4 to be completely separate things +%% ("Dual stack") +%% - OpenBSD, Windows XP / 2003, Linux if so configured +%% * Those which do not support IPv6. +%% - Ancient/weird OSes, Linux if so configured +%% +%% How to reconfigure Linux to test this: +%% Single stack (default): +%% echo 0 > /proc/sys/net/ipv6/bindv6only +%% Dual stack: +%% echo 1 > /proc/sys/net/ipv6/bindv6only +%% IPv4 only: +%% add ipv6.disable=1 to GRUB_CMDLINE_LINUX_DEFAULT in /etc/default/grub then +%% sudo update-grub && sudo reboot +%% +%% This matters in (and only in) the case where the sysadmin (or the +%% app descriptor) has only supplied a port and we wish to bind to +%% "all addresses". This means different things depending on whether +%% we're single or dual stack. On single stack binding to "::" +%% implicitly includes all IPv4 addresses, and subsequently attempting +%% to bind to "0.0.0.0" will fail. On dual stack, binding to "::" will +%% only bind to IPv6 addresses, and we need another listener bound to +%% "0.0.0.0" for IPv4. Finally, on IPv4-only systems we of course only +%% want to bind to "0.0.0.0". +%% +%% Unfortunately it seems there is no way to detect single vs dual stack +%% apart from attempting to bind to the port. +port_to_listeners(Port) -> + IPv4 = {"0.0.0.0", Port, inet}, + IPv6 = {"::", Port, inet6}, + case ipv6_status(?FIRST_TEST_BIND_PORT) of + single_stack -> [IPv6]; + ipv6_only -> [IPv6]; + dual_stack -> [IPv6, IPv4]; + ipv4_only -> [IPv4] + end. + +ipv6_status(TestPort) -> + IPv4 = [inet, {ip, {0,0,0,0}}], + IPv6 = [inet6, {ip, {0,0,0,0,0,0,0,0}}], + case gen_tcp:listen(TestPort, IPv6) of + {ok, LSock6} -> + case gen_tcp:listen(TestPort, IPv4) of + {ok, LSock4} -> + %% Dual stack + gen_tcp:close(LSock6), + gen_tcp:close(LSock4), + dual_stack; + %% Checking the error here would only let us + %% distinguish single stack IPv6 / IPv4 vs IPv6 only, + %% which we figure out below anyway. + {error, _} -> + gen_tcp:close(LSock6), + case gen_tcp:listen(TestPort, IPv4) of + %% Single stack + {ok, LSock4} -> gen_tcp:close(LSock4), + single_stack; + %% IPv6-only machine. Welcome to the future. + {error, eafnosupport} -> ipv6_only; %% Linux + {error, eprotonosupport}-> ipv6_only; %% FreeBSD + %% Dual stack machine with something already + %% on IPv4. + {error, _} -> ipv6_status(TestPort + 1) + end + end; + %% IPv4-only machine. Welcome to the 90s. + {error, eafnosupport} -> %% Linux + ipv4_only; + {error, eprotonosupport} -> %% FreeBSD + ipv4_only; + %% Port in use + {error, _} -> + ipv6_status(TestPort + 1) + end. + +tcp_name(Prefix, IPAddress, Port) + when is_atom(Prefix) andalso is_number(Port) -> + list_to_atom( + lists:flatten( + io_lib:format( + "~w_~s:~w", [Prefix, inet_parse:ntoa(IPAddress), Port]))). + tune_buffer_size(Sock) -> case getopts(Sock, [sndbuf, recbuf, buffer]) of {ok, BufSizes} -> BufSz = lists:max([Sz || {_Opt, Sz} <- BufSizes]), @@ -14,16 +142,15 @@ tune_buffer_size(Sock) -> connection_string(Sock, Direction) -> case socket_ends(Sock, Direction) of {ok, {FromAddress, FromPort, ToAddress, ToPort}} -> - {ok, format( - "~s:~p -> ~s:~p", - [maybe_ntoab(FromAddress), FromPort, - maybe_ntoab(ToAddress), ToPort])}; + {ok, lists:flatten( + io_lib:format( + "~s:~p -> ~s:~p", + [maybe_ntoab(FromAddress), FromPort, + maybe_ntoab(ToAddress), ToPort]))}; Error -> Error end. -format(Fmt, Args) -> lists:flatten(io_lib:format(Fmt, Args)). - socket_ends(Sock, Direction) -> {From, To} = sock_funs(Direction), case {From(Sock), To(Sock)} of @@ -84,3 +211,5 @@ hostname() -> {ok, #hostent{h_name = Name}} -> Name; {error, _Reason} -> Hostname end. + + diff --git a/src/emqtt_networking.erl b/src/emqtt_networking.erl deleted file mode 100644 index 2c8998dfe..000000000 --- a/src/emqtt_networking.erl +++ /dev/null @@ -1,257 +0,0 @@ --module(emqtt_networking). - --export([boot/0]). - --export([start_tcp_listener/1, stop_tcp_listener/1, tcp_host/1, ntoab/1]). - -%callback. - --export([tcp_listener_started/3, tcp_listener_stopped/3, start_client/1]). - --include_lib("kernel/include/inet.hrl"). - --define(FIRST_TEST_BIND_PORT, 10000). - -boot() -> - {ok, TcpListeners} = application:get_env(tcp_listeners), - [ok = start_tcp_listener(Listener) || Listener <- TcpListeners]. - -start_tcp_listener(Listener) -> - start_listener(Listener, emqtt, "TCP Listener", - {?MODULE, start_client, []}). - -start_listener(Listener, Protocol, Label, OnConnect) -> - [start_listener0(Address, Protocol, Label, OnConnect) || - Address <- tcp_listener_addresses(Listener)], - ok. - -start_listener0(Address, Protocol, Label, OnConnect) -> - Spec = tcp_listener_spec(emqtt_tcp_listener_sup, Address, tcp_opts(), - Protocol, Label, OnConnect), - case supervisor:start_child(emqtt_sup, Spec) of - {ok, _} -> ok; - {error, {shutdown, _}} -> {IPAddress, Port, _Family} = Address, - exit({could_not_start_tcp_listener, - {ntoa(IPAddress), Port}}) - end. - -stop_tcp_listener(Listener) -> - [stop_tcp_listener0(Address) || - Address <- tcp_listener_addresses(Listener)], - ok. - -stop_tcp_listener0({IPAddress, Port, _Family}) -> - Name = tcp_name(emqtt_tcp_listener_sup, IPAddress, Port), - ok = supervisor:terminate_child(emqtt_sup, Name), - ok = supervisor:delete_child(emqtt_sup, Name). - -tcp_listener_addresses(Port) when is_integer(Port) -> - tcp_listener_addresses_auto(Port); -tcp_listener_addresses({"auto", Port}) -> - %% Variant to prevent lots of hacking around in bash and batch files - tcp_listener_addresses_auto(Port); -tcp_listener_addresses({Host, Port}) -> - %% auto: determine family IPv4 / IPv6 after converting to IP address - tcp_listener_addresses({Host, Port, auto}); -tcp_listener_addresses({Host, Port, Family0}) - when is_integer(Port) andalso (Port >= 0) andalso (Port =< 65535) -> - [{IPAddress, Port, Family} || - {IPAddress, Family} <- getaddr(Host, Family0)]; -tcp_listener_addresses({_Host, Port, _Family0}) -> - error_logger:error_msg("invalid port ~p - not 0..65535~n", [Port]), - throw({error, {invalid_port, Port}}). - -tcp_listener_addresses_auto(Port) -> - lists:append([tcp_listener_addresses(Listener) || - Listener <- port_to_listeners(Port)]). - -tcp_listener_spec(NamePrefix, {IPAddress, Port, Family}, SocketOpts, - Protocol, Label, OnConnect) -> - {tcp_name(NamePrefix, IPAddress, Port), - {tcp_listener_sup, start_link, - [IPAddress, Port, [Family | SocketOpts], - {?MODULE, tcp_listener_started, [Protocol]}, - {?MODULE, tcp_listener_stopped, [Protocol]}, - OnConnect, Label]}, - transient, infinity, supervisor, [tcp_listener_sup]}. - - -tcp_listener_started(Protocol, IPAddress, Port) -> - %% We need the ip to distinguish e.g. 0.0.0.0 and 127.0.0.1 - %% We need the host so we can distinguish multiple instances of the above - %% in a cluster. - error_logger:info_msg("tcp listener started: ~p ~p:~p", [Protocol, IPAddress, Port]). - -tcp_listener_stopped(Protocol, IPAddress, Port) -> - error_logger:info_msg("tcp listener stopped: ~p ~p:~p", [Protocol, IPAddress, Port]). - -start_client(Sock) -> - {ok, Client} = supervisor:start_child(emqtt_client_sup, []), - ok = gen_tcp:controlling_process(Sock, Client), - emqtt_client:go(Client, Sock), - - %% see comment in rabbit_networking:start_client/2 - gen_event:which_handlers(error_logger), - - Client. - -%%-------------------------------------------------------------------- -tcp_host({0,0,0,0}) -> - hostname(); - -tcp_host({0,0,0,0,0,0,0,0}) -> - hostname(); - -tcp_host(IPAddress) -> - case inet:gethostbyaddr(IPAddress) of - {ok, #hostent{h_name = Name}} -> Name; - {error, _Reason} -> ntoa(IPAddress) - end. - -hostname() -> - {ok, Hostname} = inet:gethostname(), - case inet:gethostbyname(Hostname) of - {ok, #hostent{h_name = Name}} -> Name; - {error, _Reason} -> Hostname - end. - -tcp_opts() -> - {ok, Opts} = application:get_env(emqtt, tcp_listen_options), - Opts. - -%% inet_parse:address takes care of ip string, like "0.0.0.0" -%% inet:getaddr returns immediately for ip tuple {0,0,0,0}, -%% and runs 'inet_gethost' port process for dns lookups. -%% On Windows inet:getaddr runs dns resolver for ip string, which may fail. -getaddr(Host, Family) -> - case inet_parse:address(Host) of - {ok, IPAddress} -> [{IPAddress, resolve_family(IPAddress, Family)}]; - {error, _} -> gethostaddr(Host, Family) - end. - -gethostaddr(Host, auto) -> - Lookups = [{Family, inet:getaddr(Host, Family)} || Family <- [inet, inet6]], - case [{IP, Family} || {Family, {ok, IP}} <- Lookups] of - [] -> host_lookup_error(Host, Lookups); - IPs -> IPs - end; - -gethostaddr(Host, Family) -> - case inet:getaddr(Host, Family) of - {ok, IPAddress} -> [{IPAddress, Family}]; - {error, Reason} -> host_lookup_error(Host, Reason) - end. - -host_lookup_error(Host, Reason) -> - error_logger:error_msg("invalid host ~p - ~p~n", [Host, Reason]), - throw({error, {invalid_host, Host, Reason}}). - -resolve_family({_,_,_,_}, auto) -> inet; -resolve_family({_,_,_,_,_,_,_,_}, auto) -> inet6; -resolve_family(IP, auto) -> throw({error, {strange_family, IP}}); -resolve_family(_, F) -> F. - -%%-------------------------------------------------------------------- - -%% There are three kinds of machine (for our purposes). -%% -%% * Those which treat IPv4 addresses as a special kind of IPv6 address -%% ("Single stack") -%% - Linux by default, Windows Vista and later -%% - We also treat any (hypothetical?) IPv6-only machine the same way -%% * Those which consider IPv6 and IPv4 to be completely separate things -%% ("Dual stack") -%% - OpenBSD, Windows XP / 2003, Linux if so configured -%% * Those which do not support IPv6. -%% - Ancient/weird OSes, Linux if so configured -%% -%% How to reconfigure Linux to test this: -%% Single stack (default): -%% echo 0 > /proc/sys/net/ipv6/bindv6only -%% Dual stack: -%% echo 1 > /proc/sys/net/ipv6/bindv6only -%% IPv4 only: -%% add ipv6.disable=1 to GRUB_CMDLINE_LINUX_DEFAULT in /etc/default/grub then -%% sudo update-grub && sudo reboot -%% -%% This matters in (and only in) the case where the sysadmin (or the -%% app descriptor) has only supplied a port and we wish to bind to -%% "all addresses". This means different things depending on whether -%% we're single or dual stack. On single stack binding to "::" -%% implicitly includes all IPv4 addresses, and subsequently attempting -%% to bind to "0.0.0.0" will fail. On dual stack, binding to "::" will -%% only bind to IPv6 addresses, and we need another listener bound to -%% "0.0.0.0" for IPv4. Finally, on IPv4-only systems we of course only -%% want to bind to "0.0.0.0". -%% -%% Unfortunately it seems there is no way to detect single vs dual stack -%% apart from attempting to bind to the port. -port_to_listeners(Port) -> - IPv4 = {"0.0.0.0", Port, inet}, - IPv6 = {"::", Port, inet6}, - case ipv6_status(?FIRST_TEST_BIND_PORT) of - single_stack -> [IPv6]; - ipv6_only -> [IPv6]; - dual_stack -> [IPv6, IPv4]; - ipv4_only -> [IPv4] - end. - -ipv6_status(TestPort) -> - IPv4 = [inet, {ip, {0,0,0,0}}], - IPv6 = [inet6, {ip, {0,0,0,0,0,0,0,0}}], - case gen_tcp:listen(TestPort, IPv6) of - {ok, LSock6} -> - case gen_tcp:listen(TestPort, IPv4) of - {ok, LSock4} -> - %% Dual stack - gen_tcp:close(LSock6), - gen_tcp:close(LSock4), - dual_stack; - %% Checking the error here would only let us - %% distinguish single stack IPv6 / IPv4 vs IPv6 only, - %% which we figure out below anyway. - {error, _} -> - gen_tcp:close(LSock6), - case gen_tcp:listen(TestPort, IPv4) of - %% Single stack - {ok, LSock4} -> gen_tcp:close(LSock4), - single_stack; - %% IPv6-only machine. Welcome to the future. - {error, eafnosupport} -> ipv6_only; %% Linux - {error, eprotonosupport}-> ipv6_only; %% FreeBSD - %% Dual stack machine with something already - %% on IPv4. - {error, _} -> ipv6_status(TestPort + 1) - end - end; - %% IPv4-only machine. Welcome to the 90s. - {error, eafnosupport} -> %% Linux - ipv4_only; - {error, eprotonosupport} -> %% FreeBSD - ipv4_only; - %% Port in use - {error, _} -> - ipv6_status(TestPort + 1) - end. - -ntoa({0,0,0,0,0,16#ffff,AB,CD}) -> - inet_parse:ntoa({AB bsr 8, AB rem 256, CD bsr 8, CD rem 256}); -ntoa(IP) -> - inet_parse:ntoa(IP). - -ntoab(IP) -> - Str = ntoa(IP), - case string:str(Str, ":") of - 0 -> Str; - _ -> "[" ++ Str ++ "]" - end. - -tcp_name(Prefix, IPAddress, Port) - when is_atom(Prefix) andalso is_number(Port) -> - list_to_atom( - format("~w_~s:~w", [Prefix, inet_parse:ntoa(IPAddress), Port])). - -format(Fmt, Args) -> lists:flatten(io_lib:format(Fmt, Args)). - - - diff --git a/src/emqtt_router.erl b/src/emqtt_router.erl index 76eb4c79f..461356621 100644 --- a/src/emqtt_router.erl +++ b/src/emqtt_router.erl @@ -1,6 +1,7 @@ -module(emqtt_router). -include("emqtt.hrl"). + -include("emqtt_frame.hrl"). -export([start_link/0]). @@ -44,8 +45,8 @@ delete(Sub) when is_record(Sub, subscriber) -> gen_server:cast(?MODULE, {delete, Sub}). init([]) -> - Res = ets:new(subscriber, [bag, protected, named_table, {keypos, 2}]), - error_logger:info_msg("emqtt_router is started: ~p~n", [Res]), + ets:new(subscriber, [bag, named_table, {keypos, 2}]), + ?INFO_MSG("emqtt_router is started."), {ok, #state{}}. handle_call({insert, Sub}, _From, State) -> diff --git a/src/emqtt_sup.erl b/src/emqtt_sup.erl index 89d61eab3..4d12fdf35 100644 --- a/src/emqtt_sup.erl +++ b/src/emqtt_sup.erl @@ -5,7 +5,7 @@ -behaviour(supervisor). %% API --export([start_link/0]). +-export([start_link/1]). %% Supervisor callbacks -export([init/1]). @@ -17,17 +17,25 @@ %% API functions %% =================================================================== -start_link() -> - supervisor:start_link({local, ?MODULE}, ?MODULE, []). +start_link(Listeners) -> + supervisor:start_link({local, ?MODULE}, ?MODULE, [Listeners]). %% =================================================================== %% Supervisor callbacks %% =================================================================== -init([]) -> +init([Listeners]) -> {ok, { {one_for_all, 5, 10}, [ ?CHILD(emqtt_topic, worker), ?CHILD(emqtt_router, worker), ?CHILD(emqtt_client_sup, supervisor) - ]} }. + | listener_children(Listeners) ]} + }. + +listener_children(Listeners) -> + lists:append([emqtt_listener:spec(Listener, + {emqtt_client_sup, start_client, []}) || Listener <- Listeners]). + + + diff --git a/src/emqtt_topic.erl b/src/emqtt_topic.erl index 211bf9eba..f287e4d84 100644 --- a/src/emqtt_topic.erl +++ b/src/emqtt_topic.erl @@ -50,7 +50,7 @@ init([]) -> {record_name, topic}, {ram_copies, [node()]}, {attributes, record_info(fields, topic)}]), - error_logger:info_msg("emqtt_topic is started."), + ?INFO_MSG("emqtt_topic is started."), {ok, #state{}}. handle_call({insert, Topic}, _From, State) -> diff --git a/src/file_handle_cache.erl b/src/file_handle_cache.erl new file mode 100644 index 000000000..3260d3698 --- /dev/null +++ b/src/file_handle_cache.erl @@ -0,0 +1,1227 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% + +-module(file_handle_cache). + +%% A File Handle Cache +%% +%% This extends a subset of the functionality of the Erlang file +%% module. In the below, we use "file handle" to specifically refer to +%% file handles, and "file descriptor" to refer to descriptors which +%% are not file handles, e.g. sockets. +%% +%% Some constraints +%% 1) This supports one writer, multiple readers per file. Nothing +%% else. +%% 2) Do not open the same file from different processes. Bad things +%% may happen, especially for writes. +%% 3) Writes are all appends. You cannot write to the middle of a +%% file, although you can truncate and then append if you want. +%% 4) Although there is a write buffer, there is no read buffer. Feel +%% free to use the read_ahead mode, but beware of the interaction +%% between that buffer and the write buffer. +%% +%% Some benefits +%% 1) You do not have to remember to call sync before close +%% 2) Buffering is much more flexible than with the plain file module, +%% and you can control when the buffer gets flushed out. This means +%% that you can rely on reads-after-writes working, without having to +%% call the expensive sync. +%% 3) Unnecessary calls to position and sync get optimised out. +%% 4) You can find out what your 'real' offset is, and what your +%% 'virtual' offset is (i.e. where the hdl really is, and where it +%% would be after the write buffer is written out). +%% +%% There is also a server component which serves to limit the number +%% of open file descriptors. This is a hard limit: the server +%% component will ensure that clients do not have more file +%% descriptors open than it's configured to allow. +%% +%% On open, the client requests permission from the server to open the +%% required number of file handles. The server may ask the client to +%% close other file handles that it has open, or it may queue the +%% request and ask other clients to close file handles they have open +%% in order to satisfy the request. Requests are always satisfied in +%% the order they arrive, even if a latter request (for a small number +%% of file handles) can be satisfied before an earlier request (for a +%% larger number of file handles). On close, the client sends a +%% message to the server. These messages allow the server to keep +%% track of the number of open handles. The client also keeps a +%% gb_tree which is updated on every use of a file handle, mapping the +%% time at which the file handle was last used (timestamp) to the +%% handle. Thus the smallest key in this tree maps to the file handle +%% that has not been used for the longest amount of time. This +%% smallest key is included in the messages to the server. As such, +%% the server keeps track of when the least recently used file handle +%% was used *at the point of the most recent open or close* by each +%% client. +%% +%% Note that this data can go very out of date, by the client using +%% the least recently used handle. +%% +%% When the limit is exceeded (i.e. the number of open file handles is +%% at the limit and there are pending 'open' requests), the server +%% calculates the average age of the last reported least recently used +%% file handle of all the clients. It then tells all the clients to +%% close any handles not used for longer than this average, by +%% invoking the callback the client registered. The client should +%% receive this message and pass it into +%% set_maximum_since_use/1. However, it is highly possible this age +%% will be greater than the ages of all the handles the client knows +%% of because the client has used its file handles in the mean +%% time. Thus at this point the client reports to the server the +%% current timestamp at which its least recently used file handle was +%% last used. The server will check two seconds later that either it +%% is back under the limit, in which case all is well again, or if +%% not, it will calculate a new average age. Its data will be much +%% more recent now, and so it is very likely that when this is +%% communicated to the clients, the clients will close file handles. +%% (In extreme cases, where it's very likely that all clients have +%% used their open handles since they last sent in an update, which +%% would mean that the average will never cause any file handles to +%% be closed, the server can send out an average age of 0, resulting +%% in all available clients closing all their file handles.) +%% +%% Care is taken to ensure that (a) processes which are blocked +%% waiting for file descriptors to become available are not sent +%% requests to close file handles; and (b) given it is known how many +%% file handles a process has open, when the average age is forced to +%% 0, close messages are only sent to enough processes to release the +%% correct number of file handles and the list of processes is +%% randomly shuffled. This ensures we don't cause processes to +%% needlessly close file handles, and ensures that we don't always +%% make such requests of the same processes. +%% +%% The advantage of this scheme is that there is only communication +%% from the client to the server on open, close, and when in the +%% process of trying to reduce file handle usage. There is no +%% communication from the client to the server on normal file handle +%% operations. This scheme forms a feed-back loop - the server does +%% not care which file handles are closed, just that some are, and it +%% checks this repeatedly when over the limit. +%% +%% Handles which are closed as a result of the server are put into a +%% "soft-closed" state in which the handle is closed (data flushed out +%% and sync'd first) but the state is maintained. The handle will be +%% fully reopened again as soon as needed, thus users of this library +%% do not need to worry about their handles being closed by the server +%% - reopening them when necessary is handled transparently. +%% +%% The server also supports obtain, release and transfer. obtain/{0,1} +%% blocks until a file descriptor is available, at which point the +%% requesting process is considered to 'own' more descriptor(s). +%% release/{0,1} is the inverse operation and releases previously obtained +%% descriptor(s). transfer/{1,2} transfers ownership of file descriptor(s) +%% between processes. It is non-blocking. Obtain has a +%% lower limit, set by the ?OBTAIN_LIMIT/1 macro. File handles can use +%% the entire limit, but will be evicted by obtain calls up to the +%% point at which no more obtain calls can be satisfied by the obtains +%% limit. Thus there will always be some capacity available for file +%% handles. Processes that use obtain are never asked to return them, +%% and they are not managed in any way by the server. It is simply a +%% mechanism to ensure that processes that need file descriptors such +%% as sockets can do so in such a way that the overall number of open +%% file descriptors is managed. +%% +%% The callers of register_callback/3, obtain, and the argument of +%% transfer are monitored, reducing the count of handles in use +%% appropriately when the processes terminate. + +-behaviour(gen_server2). + +-export([register_callback/3]). +-export([open/3, close/1, read/2, append/2, needs_sync/1, sync/1, position/2, + truncate/1, current_virtual_offset/1, current_raw_offset/1, flush/1, + copy/3, set_maximum_since_use/1, delete/1, clear/1]). +-export([obtain/0, obtain/1, release/0, release/1, transfer/1, transfer/2, + set_limit/1, get_limit/0, info_keys/0, + info/0, info/1]). +-export([ulimit/0]). + +-export([start_link/0, start_link/2, init/1, handle_call/3, handle_cast/2, + handle_info/2, terminate/2, code_change/3, prioritise_cast/2]). + +-define(SERVER, ?MODULE). +-define(RESERVED_FOR_OTHERS, 100). + +-define(FILE_HANDLES_LIMIT_OTHER, 1024). +-define(FILE_HANDLES_CHECK_INTERVAL, 2000). + +-define(OBTAIN_LIMIT(LIMIT), trunc((LIMIT * 0.9) - 2)). +-define(CLIENT_ETS_TABLE, file_handle_cache_client). +-define(ELDERS_ETS_TABLE, file_handle_cache_elders). + +%%---------------------------------------------------------------------------- + +-record(file, + { reader_count, + has_writer + }). + +-record(handle, + { hdl, + offset, + is_dirty, + write_buffer_size, + write_buffer_size_limit, + write_buffer, + at_eof, + path, + mode, + options, + is_write, + is_read, + last_used_at + }). + +-record(fhc_state, + { elders, + limit, + open_count, + open_pending, + obtain_limit, + obtain_count, + obtain_pending, + clients, + timer_ref, + alarm_set, + alarm_clear + }). + +-record(cstate, + { pid, + callback, + opened, + obtained, + blocked, + pending_closes + }). + +-record(pending, + { kind, + pid, + requested, + from + }). + +%%---------------------------------------------------------------------------- +%% Specs +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(ref() :: any()). +-type(ok_or_error() :: 'ok' | {'error', any()}). +-type(val_or_error(T) :: {'ok', T} | {'error', any()}). +-type(position() :: ('bof' | 'eof' | non_neg_integer() | + {('bof' |'eof'), non_neg_integer()} | + {'cur', integer()})). +-type(offset() :: non_neg_integer()). + +-spec(register_callback/3 :: (atom(), atom(), [any()]) -> 'ok'). +-spec(open/3 :: + (file:filename(), [any()], + [{'write_buffer', (non_neg_integer() | 'infinity' | 'unbuffered')}]) + -> val_or_error(ref())). +-spec(close/1 :: (ref()) -> ok_or_error()). +-spec(read/2 :: (ref(), non_neg_integer()) -> + val_or_error([char()] | binary()) | 'eof'). +-spec(append/2 :: (ref(), iodata()) -> ok_or_error()). +-spec(sync/1 :: (ref()) -> ok_or_error()). +-spec(position/2 :: (ref(), position()) -> val_or_error(offset())). +-spec(truncate/1 :: (ref()) -> ok_or_error()). +-spec(current_virtual_offset/1 :: (ref()) -> val_or_error(offset())). +-spec(current_raw_offset/1 :: (ref()) -> val_or_error(offset())). +-spec(flush/1 :: (ref()) -> ok_or_error()). +-spec(copy/3 :: (ref(), ref(), non_neg_integer()) -> + val_or_error(non_neg_integer())). +-spec(delete/1 :: (ref()) -> ok_or_error()). +-spec(clear/1 :: (ref()) -> ok_or_error()). +-spec(set_maximum_since_use/1 :: (non_neg_integer()) -> 'ok'). +-spec(obtain/0 :: () -> 'ok'). +-spec(obtain/1 :: (non_neg_integer()) -> 'ok'). +-spec(release/0 :: () -> 'ok'). +-spec(release/1 :: (non_neg_integer()) -> 'ok'). +-spec(transfer/1 :: (pid()) -> 'ok'). +-spec(transfer/2 :: (pid(), non_neg_integer()) -> 'ok'). +-spec(set_limit/1 :: (non_neg_integer()) -> 'ok'). +-spec(get_limit/0 :: () -> non_neg_integer()). +-spec(info_keys/0 :: () -> rabbit_types:info_keys()). +-spec(info/0 :: () -> rabbit_types:infos()). +-spec(info/1 :: ([atom()]) -> rabbit_types:infos()). +-spec(ulimit/0 :: () -> 'unknown' | non_neg_integer()). + +-endif. + +%%---------------------------------------------------------------------------- +-define(INFO_KEYS, [total_limit, total_used, sockets_limit, sockets_used]). + +%%---------------------------------------------------------------------------- +%% Public API +%%---------------------------------------------------------------------------- + +start_link() -> + start_link(fun alarm_handler:set_alarm/1, fun alarm_handler:clear_alarm/1). + +start_link(AlarmSet, AlarmClear) -> + gen_server2:start_link({local, ?SERVER}, ?MODULE, [AlarmSet, AlarmClear], + [{timeout, infinity}]). + +register_callback(M, F, A) + when is_atom(M) andalso is_atom(F) andalso is_list(A) -> + gen_server2:cast(?SERVER, {register_callback, self(), {M, F, A}}). + +open(Path, Mode, Options) -> + Path1 = filename:absname(Path), + File1 = #file { reader_count = RCount, has_writer = HasWriter } = + case get({Path1, fhc_file}) of + File = #file {} -> File; + undefined -> #file { reader_count = 0, + has_writer = false } + end, + Mode1 = append_to_write(Mode), + IsWriter = is_writer(Mode1), + case IsWriter andalso HasWriter of + true -> {error, writer_exists}; + false -> {ok, Ref} = new_closed_handle(Path1, Mode1, Options), + case get_or_reopen([{Ref, new}]) of + {ok, [_Handle1]} -> + RCount1 = case is_reader(Mode1) of + true -> RCount + 1; + false -> RCount + end, + HasWriter1 = HasWriter orelse IsWriter, + put({Path1, fhc_file}, + File1 #file { reader_count = RCount1, + has_writer = HasWriter1 }), + {ok, Ref}; + Error -> + erase({Ref, fhc_handle}), + Error + end + end. + +close(Ref) -> + case erase({Ref, fhc_handle}) of + undefined -> ok; + Handle -> case hard_close(Handle) of + ok -> ok; + {Error, Handle1} -> put_handle(Ref, Handle1), + Error + end + end. + +read(Ref, Count) -> + with_flushed_handles( + [Ref], + fun ([#handle { is_read = false }]) -> + {error, not_open_for_reading}; + ([Handle = #handle { hdl = Hdl, offset = Offset }]) -> + case prim_file:read(Hdl, Count) of + {ok, Data} = Obj -> Offset1 = Offset + iolist_size(Data), + {Obj, + [Handle #handle { offset = Offset1 }]}; + eof -> {eof, [Handle #handle { at_eof = true }]}; + Error -> {Error, [Handle]} + end + end). + +append(Ref, Data) -> + with_handles( + [Ref], + fun ([#handle { is_write = false }]) -> + {error, not_open_for_writing}; + ([Handle]) -> + case maybe_seek(eof, Handle) of + {{ok, _Offset}, #handle { hdl = Hdl, offset = Offset, + write_buffer_size_limit = 0, + at_eof = true } = Handle1} -> + Offset1 = Offset + iolist_size(Data), + {prim_file:write(Hdl, Data), + [Handle1 #handle { is_dirty = true, offset = Offset1 }]}; + {{ok, _Offset}, #handle { write_buffer = WriteBuffer, + write_buffer_size = Size, + write_buffer_size_limit = Limit, + at_eof = true } = Handle1} -> + WriteBuffer1 = [Data | WriteBuffer], + Size1 = Size + iolist_size(Data), + Handle2 = Handle1 #handle { write_buffer = WriteBuffer1, + write_buffer_size = Size1 }, + case Limit =/= infinity andalso Size1 > Limit of + true -> {Result, Handle3} = write_buffer(Handle2), + {Result, [Handle3]}; + false -> {ok, [Handle2]} + end; + {{error, _} = Error, Handle1} -> + {Error, [Handle1]} + end + end). + +sync(Ref) -> + with_flushed_handles( + [Ref], + fun ([#handle { is_dirty = false, write_buffer = [] }]) -> + ok; + ([Handle = #handle { hdl = Hdl, + is_dirty = true, write_buffer = [] }]) -> + case prim_file:sync(Hdl) of + ok -> {ok, [Handle #handle { is_dirty = false }]}; + Error -> {Error, [Handle]} + end + end). + +needs_sync(Ref) -> + %% This must *not* use with_handles/2; see bug 25052 + case get({Ref, fhc_handle}) of + #handle { is_dirty = false, write_buffer = [] } -> false; + #handle {} -> true + end. + +position(Ref, NewOffset) -> + with_flushed_handles( + [Ref], + fun ([Handle]) -> {Result, Handle1} = maybe_seek(NewOffset, Handle), + {Result, [Handle1]} + end). + +truncate(Ref) -> + with_flushed_handles( + [Ref], + fun ([Handle1 = #handle { hdl = Hdl }]) -> + case prim_file:truncate(Hdl) of + ok -> {ok, [Handle1 #handle { at_eof = true }]}; + Error -> {Error, [Handle1]} + end + end). + +current_virtual_offset(Ref) -> + with_handles([Ref], fun ([#handle { at_eof = true, is_write = true, + offset = Offset, + write_buffer_size = Size }]) -> + {ok, Offset + Size}; + ([#handle { offset = Offset }]) -> + {ok, Offset} + end). + +current_raw_offset(Ref) -> + with_handles([Ref], fun ([Handle]) -> {ok, Handle #handle.offset} end). + +flush(Ref) -> + with_flushed_handles([Ref], fun ([Handle]) -> {ok, [Handle]} end). + +copy(Src, Dest, Count) -> + with_flushed_handles( + [Src, Dest], + fun ([SHandle = #handle { is_read = true, hdl = SHdl, offset = SOffset }, + DHandle = #handle { is_write = true, hdl = DHdl, offset = DOffset }] + ) -> + case prim_file:copy(SHdl, DHdl, Count) of + {ok, Count1} = Result1 -> + {Result1, + [SHandle #handle { offset = SOffset + Count1 }, + DHandle #handle { offset = DOffset + Count1, + is_dirty = true }]}; + Error -> + {Error, [SHandle, DHandle]} + end; + (_Handles) -> + {error, incorrect_handle_modes} + end). + +delete(Ref) -> + case erase({Ref, fhc_handle}) of + undefined -> + ok; + Handle = #handle { path = Path } -> + case hard_close(Handle #handle { is_dirty = false, + write_buffer = [] }) of + ok -> prim_file:delete(Path); + {Error, Handle1} -> put_handle(Ref, Handle1), + Error + end + end. + +clear(Ref) -> + with_handles( + [Ref], + fun ([#handle { at_eof = true, write_buffer_size = 0, offset = 0 }]) -> + ok; + ([Handle]) -> + case maybe_seek(bof, Handle #handle { write_buffer = [], + write_buffer_size = 0 }) of + {{ok, 0}, Handle1 = #handle { hdl = Hdl }} -> + case prim_file:truncate(Hdl) of + ok -> {ok, [Handle1 #handle { at_eof = true }]}; + Error -> {Error, [Handle1]} + end; + {{error, _} = Error, Handle1} -> + {Error, [Handle1]} + end + end). + +set_maximum_since_use(MaximumAge) -> + Now = now(), + case lists:foldl( + fun ({{Ref, fhc_handle}, + Handle = #handle { hdl = Hdl, last_used_at = Then }}, Rep) -> + case Hdl =/= closed andalso + timer:now_diff(Now, Then) >= MaximumAge of + true -> soft_close(Ref, Handle) orelse Rep; + false -> Rep + end; + (_KeyValuePair, Rep) -> + Rep + end, false, get()) of + false -> age_tree_change(), ok; + true -> ok + end. + +obtain() -> obtain(1). +release() -> release(1). +transfer(Pid) -> transfer(Pid, 1). + +obtain(Count) when Count > 0 -> + %% If the FHC isn't running, obtains succeed immediately. + case whereis(?SERVER) of + undefined -> ok; + _ -> gen_server2:call(?SERVER, {obtain, Count, self()}, infinity) + end. + +release(Count) when Count > 0 -> + gen_server2:cast(?SERVER, {release, Count, self()}). + +transfer(Pid, Count) when Count > 0 -> + gen_server2:cast(?SERVER, {transfer, Count, self(), Pid}). + +set_limit(Limit) -> + gen_server2:call(?SERVER, {set_limit, Limit}, infinity). + +get_limit() -> + gen_server2:call(?SERVER, get_limit, infinity). + +info_keys() -> ?INFO_KEYS. + +info() -> info(?INFO_KEYS). +info(Items) -> gen_server2:call(?SERVER, {info, Items}, infinity). + +%%---------------------------------------------------------------------------- +%% Internal functions +%%---------------------------------------------------------------------------- + +is_reader(Mode) -> lists:member(read, Mode). + +is_writer(Mode) -> lists:member(write, Mode). + +append_to_write(Mode) -> + case lists:member(append, Mode) of + true -> [write | Mode -- [append, write]]; + false -> Mode + end. + +with_handles(Refs, Fun) -> + case get_or_reopen([{Ref, reopen} || Ref <- Refs]) of + {ok, Handles} -> + case Fun(Handles) of + {Result, Handles1} when is_list(Handles1) -> + lists:zipwith(fun put_handle/2, Refs, Handles1), + Result; + Result -> + Result + end; + Error -> + Error + end. + +with_flushed_handles(Refs, Fun) -> + with_handles( + Refs, + fun (Handles) -> + case lists:foldl( + fun (Handle, {ok, HandlesAcc}) -> + {Res, Handle1} = write_buffer(Handle), + {Res, [Handle1 | HandlesAcc]}; + (Handle, {Error, HandlesAcc}) -> + {Error, [Handle | HandlesAcc]} + end, {ok, []}, Handles) of + {ok, Handles1} -> + Fun(lists:reverse(Handles1)); + {Error, Handles1} -> + {Error, lists:reverse(Handles1)} + end + end). + +get_or_reopen(RefNewOrReopens) -> + case partition_handles(RefNewOrReopens) of + {OpenHdls, []} -> + {ok, [Handle || {_Ref, Handle} <- OpenHdls]}; + {OpenHdls, ClosedHdls} -> + Oldest = oldest(get_age_tree(), fun () -> now() end), + case gen_server2:call(?SERVER, {open, self(), length(ClosedHdls), + Oldest}, infinity) of + ok -> + case reopen(ClosedHdls) of + {ok, RefHdls} -> sort_handles(RefNewOrReopens, + OpenHdls, RefHdls, []); + Error -> Error + end; + close -> + [soft_close(Ref, Handle) || + {{Ref, fhc_handle}, Handle = #handle { hdl = Hdl }} <- + get(), + Hdl =/= closed], + get_or_reopen(RefNewOrReopens) + end + end. + +reopen(ClosedHdls) -> reopen(ClosedHdls, get_age_tree(), []). + +reopen([], Tree, RefHdls) -> + put_age_tree(Tree), + {ok, lists:reverse(RefHdls)}; +reopen([{Ref, NewOrReopen, Handle = #handle { hdl = closed, + path = Path, + mode = Mode, + offset = Offset, + last_used_at = undefined }} | + RefNewOrReopenHdls] = ToOpen, Tree, RefHdls) -> + case prim_file:open(Path, case NewOrReopen of + new -> Mode; + reopen -> [read | Mode] + end) of + {ok, Hdl} -> + Now = now(), + {{ok, _Offset}, Handle1} = + maybe_seek(Offset, Handle #handle { hdl = Hdl, + offset = 0, + last_used_at = Now }), + put({Ref, fhc_handle}, Handle1), + reopen(RefNewOrReopenHdls, gb_trees:insert(Now, Ref, Tree), + [{Ref, Handle1} | RefHdls]); + Error -> + %% NB: none of the handles in ToOpen are in the age tree + Oldest = oldest(Tree, fun () -> undefined end), + [gen_server2:cast(?SERVER, {close, self(), Oldest}) || _ <- ToOpen], + put_age_tree(Tree), + Error + end. + +partition_handles(RefNewOrReopens) -> + lists:foldr( + fun ({Ref, NewOrReopen}, {Open, Closed}) -> + case get({Ref, fhc_handle}) of + #handle { hdl = closed } = Handle -> + {Open, [{Ref, NewOrReopen, Handle} | Closed]}; + #handle {} = Handle -> + {[{Ref, Handle} | Open], Closed} + end + end, {[], []}, RefNewOrReopens). + +sort_handles([], [], [], Acc) -> + {ok, lists:reverse(Acc)}; +sort_handles([{Ref, _} | RefHdls], [{Ref, Handle} | RefHdlsA], RefHdlsB, Acc) -> + sort_handles(RefHdls, RefHdlsA, RefHdlsB, [Handle | Acc]); +sort_handles([{Ref, _} | RefHdls], RefHdlsA, [{Ref, Handle} | RefHdlsB], Acc) -> + sort_handles(RefHdls, RefHdlsA, RefHdlsB, [Handle | Acc]). + +put_handle(Ref, Handle = #handle { last_used_at = Then }) -> + Now = now(), + age_tree_update(Then, Now, Ref), + put({Ref, fhc_handle}, Handle #handle { last_used_at = Now }). + +with_age_tree(Fun) -> put_age_tree(Fun(get_age_tree())). + +get_age_tree() -> + case get(fhc_age_tree) of + undefined -> gb_trees:empty(); + AgeTree -> AgeTree + end. + +put_age_tree(Tree) -> put(fhc_age_tree, Tree). + +age_tree_update(Then, Now, Ref) -> + with_age_tree( + fun (Tree) -> + gb_trees:insert(Now, Ref, gb_trees:delete_any(Then, Tree)) + end). + +age_tree_delete(Then) -> + with_age_tree( + fun (Tree) -> + Tree1 = gb_trees:delete_any(Then, Tree), + Oldest = oldest(Tree1, fun () -> undefined end), + gen_server2:cast(?SERVER, {close, self(), Oldest}), + Tree1 + end). + +age_tree_change() -> + with_age_tree( + fun (Tree) -> + case gb_trees:is_empty(Tree) of + true -> Tree; + false -> {Oldest, _Ref} = gb_trees:smallest(Tree), + gen_server2:cast(?SERVER, {update, self(), Oldest}) + end, + Tree + end). + +oldest(Tree, DefaultFun) -> + case gb_trees:is_empty(Tree) of + true -> DefaultFun(); + false -> {Oldest, _Ref} = gb_trees:smallest(Tree), + Oldest + end. + +new_closed_handle(Path, Mode, Options) -> + WriteBufferSize = + case proplists:get_value(write_buffer, Options, unbuffered) of + unbuffered -> 0; + infinity -> infinity; + N when is_integer(N) -> N + end, + Ref = make_ref(), + put({Ref, fhc_handle}, #handle { hdl = closed, + offset = 0, + is_dirty = false, + write_buffer_size = 0, + write_buffer_size_limit = WriteBufferSize, + write_buffer = [], + at_eof = false, + path = Path, + mode = Mode, + options = Options, + is_write = is_writer(Mode), + is_read = is_reader(Mode), + last_used_at = undefined }), + {ok, Ref}. + +soft_close(Ref, Handle) -> + {Res, Handle1} = soft_close(Handle), + case Res of + ok -> put({Ref, fhc_handle}, Handle1), + true; + _ -> put_handle(Ref, Handle1), + false + end. + +soft_close(Handle = #handle { hdl = closed }) -> + {ok, Handle}; +soft_close(Handle) -> + case write_buffer(Handle) of + {ok, #handle { hdl = Hdl, + is_dirty = IsDirty, + last_used_at = Then } = Handle1 } -> + ok = case IsDirty of + true -> prim_file:sync(Hdl); + false -> ok + end, + ok = prim_file:close(Hdl), + age_tree_delete(Then), + {ok, Handle1 #handle { hdl = closed, + is_dirty = false, + last_used_at = undefined }}; + {_Error, _Handle} = Result -> + Result + end. + +hard_close(Handle) -> + case soft_close(Handle) of + {ok, #handle { path = Path, + is_read = IsReader, is_write = IsWriter }} -> + #file { reader_count = RCount, has_writer = HasWriter } = File = + get({Path, fhc_file}), + RCount1 = case IsReader of + true -> RCount - 1; + false -> RCount + end, + HasWriter1 = HasWriter andalso not IsWriter, + case RCount1 =:= 0 andalso not HasWriter1 of + true -> erase({Path, fhc_file}); + false -> put({Path, fhc_file}, + File #file { reader_count = RCount1, + has_writer = HasWriter1 }) + end, + ok; + {_Error, _Handle} = Result -> + Result + end. + +maybe_seek(NewOffset, Handle = #handle { hdl = Hdl, offset = Offset, + at_eof = AtEoF }) -> + {AtEoF1, NeedsSeek} = needs_seek(AtEoF, Offset, NewOffset), + case (case NeedsSeek of + true -> prim_file:position(Hdl, NewOffset); + false -> {ok, Offset} + end) of + {ok, Offset1} = Result -> + {Result, Handle #handle { offset = Offset1, at_eof = AtEoF1 }}; + {error, _} = Error -> + {Error, Handle} + end. + +needs_seek( AtEoF, _CurOffset, cur ) -> {AtEoF, false}; +needs_seek( AtEoF, _CurOffset, {cur, 0}) -> {AtEoF, false}; +needs_seek( true, _CurOffset, eof ) -> {true , false}; +needs_seek( true, _CurOffset, {eof, 0}) -> {true , false}; +needs_seek( false, _CurOffset, eof ) -> {true , true }; +needs_seek( false, _CurOffset, {eof, 0}) -> {true , true }; +needs_seek( AtEoF, 0, bof ) -> {AtEoF, false}; +needs_seek( AtEoF, 0, {bof, 0}) -> {AtEoF, false}; +needs_seek( AtEoF, CurOffset, CurOffset) -> {AtEoF, false}; +needs_seek( true, CurOffset, {bof, DesiredOffset}) + when DesiredOffset >= CurOffset -> + {true, true}; +needs_seek( true, _CurOffset, {cur, DesiredOffset}) + when DesiredOffset > 0 -> + {true, true}; +needs_seek( true, CurOffset, DesiredOffset) %% same as {bof, DO} + when is_integer(DesiredOffset) andalso DesiredOffset >= CurOffset -> + {true, true}; +%% because we can't really track size, we could well end up at EoF and not know +needs_seek(_AtEoF, _CurOffset, _DesiredOffset) -> + {false, true}. + +write_buffer(Handle = #handle { write_buffer = [] }) -> + {ok, Handle}; +write_buffer(Handle = #handle { hdl = Hdl, offset = Offset, + write_buffer = WriteBuffer, + write_buffer_size = DataSize, + at_eof = true }) -> + case prim_file:write(Hdl, lists:reverse(WriteBuffer)) of + ok -> + Offset1 = Offset + DataSize, + {ok, Handle #handle { offset = Offset1, is_dirty = true, + write_buffer = [], write_buffer_size = 0 }}; + {error, _} = Error -> + {Error, Handle} + end. + +infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items]. + +i(total_limit, #fhc_state{limit = Limit}) -> Limit; +i(total_used, #fhc_state{open_count = C1, obtain_count = C2}) -> C1 + C2; +i(sockets_limit, #fhc_state{obtain_limit = Limit}) -> Limit; +i(sockets_used, #fhc_state{obtain_count = Count}) -> Count; +i(Item, _) -> throw({bad_argument, Item}). + +%%---------------------------------------------------------------------------- +%% gen_server2 callbacks +%%---------------------------------------------------------------------------- + +init([AlarmSet, AlarmClear]) -> + Limit = case application:get_env(file_handles_high_watermark) of + {ok, Watermark} when (is_integer(Watermark) andalso + Watermark > 0) -> + Watermark; + _ -> + case ulimit() of + unknown -> ?FILE_HANDLES_LIMIT_OTHER; + Lim -> lists:max([2, Lim - ?RESERVED_FOR_OTHERS]) + end + end, + ObtainLimit = obtain_limit(Limit), + error_logger:info_msg("Limiting to approx ~p file handles (~p sockets)~n", + [Limit, ObtainLimit]), + Clients = ets:new(?CLIENT_ETS_TABLE, [set, private, {keypos, #cstate.pid}]), + Elders = ets:new(?ELDERS_ETS_TABLE, [set, private]), + {ok, #fhc_state { elders = Elders, + limit = Limit, + open_count = 0, + open_pending = pending_new(), + obtain_limit = ObtainLimit, + obtain_count = 0, + obtain_pending = pending_new(), + clients = Clients, + timer_ref = undefined, + alarm_set = AlarmSet, + alarm_clear = AlarmClear }}. + +prioritise_cast(Msg, _State) -> + case Msg of + {release, _, _} -> 5; + _ -> 0 + end. + +handle_call({open, Pid, Requested, EldestUnusedSince}, From, + State = #fhc_state { open_count = Count, + open_pending = Pending, + elders = Elders, + clients = Clients }) + when EldestUnusedSince =/= undefined -> + true = ets:insert(Elders, {Pid, EldestUnusedSince}), + Item = #pending { kind = open, + pid = Pid, + requested = Requested, + from = From }, + ok = track_client(Pid, Clients), + case needs_reduce(State #fhc_state { open_count = Count + Requested }) of + true -> case ets:lookup(Clients, Pid) of + [#cstate { opened = 0 }] -> + true = ets:update_element( + Clients, Pid, {#cstate.blocked, true}), + {noreply, + reduce(State #fhc_state { + open_pending = pending_in(Item, Pending) })}; + [#cstate { opened = Opened }] -> + true = ets:update_element( + Clients, Pid, + {#cstate.pending_closes, Opened}), + {reply, close, State} + end; + false -> {noreply, run_pending_item(Item, State)} + end; + +handle_call({obtain, N, Pid}, From, State = #fhc_state { + obtain_count = Count, + obtain_pending = Pending, + clients = Clients }) -> + ok = track_client(Pid, Clients), + Item = #pending { kind = obtain, pid = Pid, requested = N, from = From }, + Enqueue = fun () -> + true = ets:update_element(Clients, Pid, + {#cstate.blocked, true}), + State #fhc_state { + obtain_pending = pending_in(Item, Pending) } + end, + {noreply, + case obtain_limit_reached(State) of + true -> Enqueue(); + false -> case needs_reduce(State #fhc_state { + obtain_count = Count + N }) of + true -> reduce(Enqueue()); + false -> adjust_alarm( + State, run_pending_item(Item, State)) + end + end}; + +handle_call({set_limit, Limit}, _From, State) -> + {reply, ok, adjust_alarm( + State, maybe_reduce( + process_pending( + State #fhc_state { + limit = Limit, + obtain_limit = obtain_limit(Limit) })))}; + +handle_call(get_limit, _From, State = #fhc_state { limit = Limit }) -> + {reply, Limit, State}; + +handle_call({info, Items}, _From, State) -> + {reply, infos(Items, State), State}. + +handle_cast({register_callback, Pid, MFA}, + State = #fhc_state { clients = Clients }) -> + ok = track_client(Pid, Clients), + true = ets:update_element(Clients, Pid, {#cstate.callback, MFA}), + {noreply, State}; + +handle_cast({update, Pid, EldestUnusedSince}, + State = #fhc_state { elders = Elders }) + when EldestUnusedSince =/= undefined -> + true = ets:insert(Elders, {Pid, EldestUnusedSince}), + %% don't call maybe_reduce from here otherwise we can create a + %% storm of messages + {noreply, State}; + +handle_cast({release, N, Pid}, State) -> + {noreply, adjust_alarm(State, process_pending( + update_counts(obtain, Pid, -N, State)))}; + +handle_cast({close, Pid, EldestUnusedSince}, + State = #fhc_state { elders = Elders, clients = Clients }) -> + true = case EldestUnusedSince of + undefined -> ets:delete(Elders, Pid); + _ -> ets:insert(Elders, {Pid, EldestUnusedSince}) + end, + ets:update_counter(Clients, Pid, {#cstate.pending_closes, -1, 0, 0}), + {noreply, adjust_alarm(State, process_pending( + update_counts(open, Pid, -1, State)))}; + +handle_cast({transfer, N, FromPid, ToPid}, State) -> + ok = track_client(ToPid, State#fhc_state.clients), + {noreply, process_pending( + update_counts(obtain, ToPid, +N, + update_counts(obtain, FromPid, -N, State)))}. + +handle_info(check_counts, State) -> + {noreply, maybe_reduce(State #fhc_state { timer_ref = undefined })}; + +handle_info({'DOWN', _MRef, process, Pid, _Reason}, + State = #fhc_state { elders = Elders, + open_count = OpenCount, + open_pending = OpenPending, + obtain_count = ObtainCount, + obtain_pending = ObtainPending, + clients = Clients }) -> + [#cstate { opened = Opened, obtained = Obtained }] = + ets:lookup(Clients, Pid), + true = ets:delete(Clients, Pid), + true = ets:delete(Elders, Pid), + FilterFun = fun (#pending { pid = Pid1 }) -> Pid1 =/= Pid end, + {noreply, adjust_alarm( + State, + process_pending( + State #fhc_state { + open_count = OpenCount - Opened, + open_pending = filter_pending(FilterFun, OpenPending), + obtain_count = ObtainCount - Obtained, + obtain_pending = filter_pending(FilterFun, ObtainPending) }))}. + +terminate(_Reason, State = #fhc_state { clients = Clients, + elders = Elders }) -> + ets:delete(Clients), + ets:delete(Elders), + State. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%---------------------------------------------------------------------------- +%% pending queue abstraction helpers +%%---------------------------------------------------------------------------- + +queue_fold(Fun, Init, Q) -> + case queue:out(Q) of + {empty, _Q} -> Init; + {{value, V}, Q1} -> queue_fold(Fun, Fun(V, Init), Q1) + end. + +filter_pending(Fun, {Count, Queue}) -> + {Delta, Queue1} = + queue_fold( + fun (Item = #pending { requested = Requested }, {DeltaN, QueueN}) -> + case Fun(Item) of + true -> {DeltaN, queue:in(Item, QueueN)}; + false -> {DeltaN - Requested, QueueN} + end + end, {0, queue:new()}, Queue), + {Count + Delta, Queue1}. + +pending_new() -> + {0, queue:new()}. + +pending_in(Item = #pending { requested = Requested }, {Count, Queue}) -> + {Count + Requested, queue:in(Item, Queue)}. + +pending_out({0, _Queue} = Pending) -> + {empty, Pending}; +pending_out({N, Queue}) -> + {{value, #pending { requested = Requested }} = Result, Queue1} = + queue:out(Queue), + {Result, {N - Requested, Queue1}}. + +pending_count({Count, _Queue}) -> + Count. + +pending_is_empty({0, _Queue}) -> + true; +pending_is_empty({_N, _Queue}) -> + false. + +%%---------------------------------------------------------------------------- +%% server helpers +%%---------------------------------------------------------------------------- + +obtain_limit(infinity) -> infinity; +obtain_limit(Limit) -> case ?OBTAIN_LIMIT(Limit) of + OLimit when OLimit < 0 -> 0; + OLimit -> OLimit + end. + +obtain_limit_reached(#fhc_state { obtain_limit = Limit, + obtain_count = Count}) -> + Limit =/= infinity andalso Count >= Limit. + +adjust_alarm(OldState = #fhc_state { alarm_set = AlarmSet, + alarm_clear = AlarmClear }, NewState) -> + case {obtain_limit_reached(OldState), obtain_limit_reached(NewState)} of + {false, true} -> AlarmSet({file_descriptor_limit, []}); + {true, false} -> AlarmClear(file_descriptor_limit); + _ -> ok + end, + NewState. + +process_pending(State = #fhc_state { limit = infinity }) -> + State; +process_pending(State) -> + process_open(process_obtain(State)). + +process_open(State = #fhc_state { limit = Limit, + open_pending = Pending, + open_count = OpenCount, + obtain_count = ObtainCount }) -> + {Pending1, State1} = + process_pending(Pending, Limit - (ObtainCount + OpenCount), State), + State1 #fhc_state { open_pending = Pending1 }. + +process_obtain(State = #fhc_state { limit = Limit, + obtain_pending = Pending, + obtain_limit = ObtainLimit, + obtain_count = ObtainCount, + open_count = OpenCount }) -> + Quota = lists:min([ObtainLimit - ObtainCount, + Limit - (ObtainCount + OpenCount)]), + {Pending1, State1} = process_pending(Pending, Quota, State), + State1 #fhc_state { obtain_pending = Pending1 }. + +process_pending(Pending, Quota, State) when Quota =< 0 -> + {Pending, State}; +process_pending(Pending, Quota, State) -> + case pending_out(Pending) of + {empty, _Pending} -> + {Pending, State}; + {{value, #pending { requested = Requested }}, _Pending1} + when Requested > Quota -> + {Pending, State}; + {{value, #pending { requested = Requested } = Item}, Pending1} -> + process_pending(Pending1, Quota - Requested, + run_pending_item(Item, State)) + end. + +run_pending_item(#pending { kind = Kind, + pid = Pid, + requested = Requested, + from = From }, + State = #fhc_state { clients = Clients }) -> + gen_server2:reply(From, ok), + true = ets:update_element(Clients, Pid, {#cstate.blocked, false}), + update_counts(Kind, Pid, Requested, State). + +update_counts(Kind, Pid, Delta, + State = #fhc_state { open_count = OpenCount, + obtain_count = ObtainCount, + clients = Clients }) -> + {OpenDelta, ObtainDelta} = update_counts1(Kind, Pid, Delta, Clients), + State #fhc_state { open_count = OpenCount + OpenDelta, + obtain_count = ObtainCount + ObtainDelta }. + +update_counts1(open, Pid, Delta, Clients) -> + ets:update_counter(Clients, Pid, {#cstate.opened, Delta}), + {Delta, 0}; +update_counts1(obtain, Pid, Delta, Clients) -> + ets:update_counter(Clients, Pid, {#cstate.obtained, Delta}), + {0, Delta}. + +maybe_reduce(State) -> + case needs_reduce(State) of + true -> reduce(State); + false -> State + end. + +needs_reduce(#fhc_state { limit = Limit, + open_count = OpenCount, + open_pending = OpenPending, + obtain_count = ObtainCount, + obtain_limit = ObtainLimit, + obtain_pending = ObtainPending }) -> + Limit =/= infinity + andalso ((OpenCount + ObtainCount > Limit) + orelse (not pending_is_empty(OpenPending)) + orelse (ObtainCount < ObtainLimit + andalso not pending_is_empty(ObtainPending))). + +reduce(State = #fhc_state { open_pending = OpenPending, + obtain_pending = ObtainPending, + elders = Elders, + clients = Clients, + timer_ref = TRef }) -> + Now = now(), + {CStates, Sum, ClientCount} = + ets:foldl(fun ({Pid, Eldest}, {CStatesAcc, SumAcc, CountAcc} = Accs) -> + [#cstate { pending_closes = PendingCloses, + opened = Opened, + blocked = Blocked } = CState] = + ets:lookup(Clients, Pid), + case Blocked orelse PendingCloses =:= Opened of + true -> Accs; + false -> {[CState | CStatesAcc], + SumAcc + timer:now_diff(Now, Eldest), + CountAcc + 1} + end + end, {[], 0, 0}, Elders), + case CStates of + [] -> ok; + _ -> case (Sum / ClientCount) - + (1000 * ?FILE_HANDLES_CHECK_INTERVAL) of + AverageAge when AverageAge > 0 -> + notify_age(CStates, AverageAge); + _ -> + notify_age0(Clients, CStates, + pending_count(OpenPending) + + pending_count(ObtainPending)) + end + end, + case TRef of + undefined -> TRef1 = erlang:send_after( + ?FILE_HANDLES_CHECK_INTERVAL, ?SERVER, + check_counts), + State #fhc_state { timer_ref = TRef1 }; + _ -> State + end. + +notify_age(CStates, AverageAge) -> + lists:foreach( + fun (#cstate { callback = undefined }) -> ok; + (#cstate { callback = {M, F, A} }) -> apply(M, F, A ++ [AverageAge]) + end, CStates). + +notify_age0(Clients, CStates, Required) -> + case [CState || CState <- CStates, CState#cstate.callback =/= undefined] of + [] -> ok; + Notifications -> S = random:uniform(length(Notifications)), + {L1, L2} = lists:split(S, Notifications), + notify(Clients, Required, L2 ++ L1) + end. + +notify(_Clients, _Required, []) -> + ok; +notify(_Clients, Required, _Notifications) when Required =< 0 -> + ok; +notify(Clients, Required, [#cstate{ pid = Pid, + callback = {M, F, A}, + opened = Opened } | Notifications]) -> + apply(M, F, A ++ [0]), + ets:update_element(Clients, Pid, {#cstate.pending_closes, Opened}), + notify(Clients, Required - Opened, Notifications). + +track_client(Pid, Clients) -> + case ets:insert_new(Clients, #cstate { pid = Pid, + callback = undefined, + opened = 0, + obtained = 0, + blocked = false, + pending_closes = 0 }) of + true -> _MRef = erlang:monitor(process, Pid), + ok; + false -> ok + end. + + +%% To increase the number of file descriptors: on Windows set ERL_MAX_PORTS +%% environment variable, on Linux set `ulimit -n`. +ulimit() -> + case proplists:get_value(max_fds, erlang:system_info(check_io)) of + MaxFds when is_integer(MaxFds) andalso MaxFds > 1 -> + case os:type() of + {win32, _OsName} -> + %% On Windows max_fds is twice the number of open files: + %% https://github.com/yrashk/erlang/blob/e1282325ed75e52a98d5/erts/emulator/sys/win32/sys.c#L2459-2466 + MaxFds div 2; + _Any -> + %% For other operating systems trust Erlang. + MaxFds + end; + _ -> + unknown + end. diff --git a/src/gen_server2.erl b/src/gen_server2.erl new file mode 100644 index 000000000..78bbbe062 --- /dev/null +++ b/src/gen_server2.erl @@ -0,0 +1,1234 @@ +%% This file is a copy of gen_server.erl from the R13B-1 Erlang/OTP +%% distribution, with the following modifications: +%% +%% 1) the module name is gen_server2 +%% +%% 2) more efficient handling of selective receives in callbacks +%% gen_server2 processes drain their message queue into an internal +%% buffer before invoking any callback module functions. Messages are +%% dequeued from the buffer for processing. Thus the effective message +%% queue of a gen_server2 process is the concatenation of the internal +%% buffer and the real message queue. +%% As a result of the draining, any selective receive invoked inside a +%% callback is less likely to have to scan a large message queue. +%% +%% 3) gen_server2:cast is guaranteed to be order-preserving +%% The original code could reorder messages when communicating with a +%% process on a remote node that was not currently connected. +%% +%% 4) The callback module can optionally implement prioritise_call/3, +%% prioritise_cast/2 and prioritise_info/2. These functions take +%% Message, From and State or just Message and State and return a +%% single integer representing the priority attached to the message. +%% Messages with higher priorities are processed before requests with +%% lower priorities. The default priority is 0. +%% +%% 5) The callback module can optionally implement +%% handle_pre_hibernate/1 and handle_post_hibernate/1. These will be +%% called immediately prior to and post hibernation, respectively. If +%% handle_pre_hibernate returns {hibernate, NewState} then the process +%% will hibernate. If the module does not implement +%% handle_pre_hibernate/1 then the default action is to hibernate. +%% +%% 6) init can return a 4th arg, {backoff, InitialTimeout, +%% MinimumTimeout, DesiredHibernatePeriod} (all in milliseconds, +%% 'infinity' does not make sense here). Then, on all callbacks which +%% can return a timeout (including init), timeout can be +%% 'hibernate'. When this is the case, the current timeout value will +%% be used (initially, the InitialTimeout supplied from init). After +%% this timeout has occurred, hibernation will occur as normal. Upon +%% awaking, a new current timeout value will be calculated. +%% +%% The purpose is that the gen_server2 takes care of adjusting the +%% current timeout value such that the process will increase the +%% timeout value repeatedly if it is unable to sleep for the +%% DesiredHibernatePeriod. If it is able to sleep for the +%% DesiredHibernatePeriod it will decrease the current timeout down to +%% the MinimumTimeout, so that the process is put to sleep sooner (and +%% hopefully stays asleep for longer). In short, should a process +%% using this receive a burst of messages, it should not hibernate +%% between those messages, but as the messages become less frequent, +%% the process will not only hibernate, it will do so sooner after +%% each message. +%% +%% When using this backoff mechanism, normal timeout values (i.e. not +%% 'hibernate') can still be used, and if they are used then the +%% handle_info(timeout, State) will be called as normal. In this case, +%% returning 'hibernate' from handle_info(timeout, State) will not +%% hibernate the process immediately, as it would if backoff wasn't +%% being used. Instead it'll wait for the current timeout as described +%% above. +%% +%% 7) The callback module can return from any of the handle_* +%% functions, a {become, Module, State} triple, or a {become, Module, +%% State, Timeout} quadruple. This allows the gen_server to +%% dynamically change the callback module. The State is the new state +%% which will be passed into any of the callback functions in the new +%% module. Note there is no form also encompassing a reply, thus if +%% you wish to reply in handle_call/3 and change the callback module, +%% you need to use gen_server2:reply/2 to issue the reply manually. +%% +%% 8) The callback module can optionally implement +%% format_message_queue/2 which is the equivalent of format_status/2 +%% but where the second argument is specifically the priority_queue +%% which contains the prioritised message_queue. + +%% All modifications are (C) 2009-2012 VMware, Inc. + +%% ``The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved via the world wide web at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% The Initial Developer of the Original Code is Ericsson Utvecklings AB. +%% Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings +%% AB. All Rights Reserved.'' +%% +%% $Id$ +%% +-module(gen_server2). + +%%% --------------------------------------------------- +%%% +%%% The idea behind THIS server is that the user module +%%% provides (different) functions to handle different +%%% kind of inputs. +%%% If the Parent process terminates the Module:terminate/2 +%%% function is called. +%%% +%%% The user module should export: +%%% +%%% init(Args) +%%% ==> {ok, State} +%%% {ok, State, Timeout} +%%% {ok, State, Timeout, Backoff} +%%% ignore +%%% {stop, Reason} +%%% +%%% handle_call(Msg, {From, Tag}, State) +%%% +%%% ==> {reply, Reply, State} +%%% {reply, Reply, State, Timeout} +%%% {noreply, State} +%%% {noreply, State, Timeout} +%%% {stop, Reason, Reply, State} +%%% Reason = normal | shutdown | Term terminate(State) is called +%%% +%%% handle_cast(Msg, State) +%%% +%%% ==> {noreply, State} +%%% {noreply, State, Timeout} +%%% {stop, Reason, State} +%%% Reason = normal | shutdown | Term terminate(State) is called +%%% +%%% handle_info(Info, State) Info is e.g. {'EXIT', P, R}, {nodedown, N}, ... +%%% +%%% ==> {noreply, State} +%%% {noreply, State, Timeout} +%%% {stop, Reason, State} +%%% Reason = normal | shutdown | Term, terminate(State) is called +%%% +%%% terminate(Reason, State) Let the user module clean up +%%% Reason = normal | shutdown | {shutdown, Term} | Term +%%% always called when server terminates +%%% +%%% ==> ok | Term +%%% +%%% handle_pre_hibernate(State) +%%% +%%% ==> {hibernate, State} +%%% {stop, Reason, State} +%%% Reason = normal | shutdown | Term, terminate(State) is called +%%% +%%% handle_post_hibernate(State) +%%% +%%% ==> {noreply, State} +%%% {stop, Reason, State} +%%% Reason = normal | shutdown | Term, terminate(State) is called +%%% +%%% The work flow (of the server) can be described as follows: +%%% +%%% User module Generic +%%% ----------- ------- +%%% start -----> start +%%% init <----- . +%%% +%%% loop +%%% handle_call <----- . +%%% -----> reply +%%% +%%% handle_cast <----- . +%%% +%%% handle_info <----- . +%%% +%%% terminate <----- . +%%% +%%% -----> reply +%%% +%%% +%%% --------------------------------------------------- + +%% API +-export([start/3, start/4, + start_link/3, start_link/4, + call/2, call/3, + cast/2, reply/2, + abcast/2, abcast/3, + multi_call/2, multi_call/3, multi_call/4, + enter_loop/3, enter_loop/4, enter_loop/5, enter_loop/6, wake_hib/1]). + +%% System exports +-export([system_continue/3, + system_terminate/4, + system_code_change/4, + format_status/2]). + +%% Internal exports +-export([init_it/6]). + +-import(error_logger, [format/2]). + +%% State record +-record(gs2_state, {parent, name, state, mod, time, + timeout_state, queue, debug, prioritise_call, + prioritise_cast, prioritise_info}). + +-ifdef(use_specs). + +%%%========================================================================= +%%% Specs. These exist only to shut up dialyzer's warnings +%%%========================================================================= + +-type(gs2_state() :: #gs2_state{}). + +-spec(handle_common_termination/3 :: + (any(), atom(), gs2_state()) -> no_return()). +-spec(hibernate/1 :: (gs2_state()) -> no_return()). +-spec(pre_hibernate/1 :: (gs2_state()) -> no_return()). +-spec(system_terminate/4 :: (_, _, _, gs2_state()) -> no_return()). + +-type(millis() :: non_neg_integer()). + +%%%========================================================================= +%%% API +%%%========================================================================= + +-callback init(Args :: term()) -> + {ok, State :: term()} | + {ok, State :: term(), timeout() | hibernate} | + {ok, State :: term(), timeout() | hibernate, + {backoff, millis(), millis(), millis()}} | + ignore | + {stop, Reason :: term()}. +-callback handle_call(Request :: term(), From :: {pid(), Tag :: term()}, + State :: term()) -> + {reply, Reply :: term(), NewState :: term()} | + {reply, Reply :: term(), NewState :: term(), timeout() | hibernate} | + {noreply, NewState :: term()} | + {noreply, NewState :: term(), timeout() | hibernate} | + {stop, Reason :: term(), + Reply :: term(), NewState :: term()}. +-callback handle_cast(Request :: term(), State :: term()) -> + {noreply, NewState :: term()} | + {noreply, NewState :: term(), timeout() | hibernate} | + {stop, Reason :: term(), NewState :: term()}. +-callback handle_info(Info :: term(), State :: term()) -> + {noreply, NewState :: term()} | + {noreply, NewState :: term(), timeout() | hibernate} | + {stop, Reason :: term(), NewState :: term()}. +-callback terminate(Reason :: (normal | shutdown | {shutdown, term()} | term()), + State :: term()) -> + ok | term(). +-callback code_change(OldVsn :: (term() | {down, term()}), State :: term(), + Extra :: term()) -> + {ok, NewState :: term()} | {error, Reason :: term()}. + +%% It's not possible to define "optional" -callbacks, so putting specs +%% for handle_pre_hibernate/1 and handle_post_hibernate/1 will result +%% in warnings (the same applied for the behaviour_info before). + +-else. + +-export([behaviour_info/1]). + +behaviour_info(callbacks) -> + [{init,1},{handle_call,3},{handle_cast,2},{handle_info,2}, + {terminate,2},{code_change,3}]; +behaviour_info(_Other) -> + undefined. + +-endif. + +%%% ----------------------------------------------------------------- +%%% Starts a generic server. +%%% start(Mod, Args, Options) +%%% start(Name, Mod, Args, Options) +%%% start_link(Mod, Args, Options) +%%% start_link(Name, Mod, Args, Options) where: +%%% Name ::= {local, atom()} | {global, atom()} +%%% Mod ::= atom(), callback module implementing the 'real' server +%%% Args ::= term(), init arguments (to Mod:init/1) +%%% Options ::= [{timeout, Timeout} | {debug, [Flag]}] +%%% Flag ::= trace | log | {logfile, File} | statistics | debug +%%% (debug == log && statistics) +%%% Returns: {ok, Pid} | +%%% {error, {already_started, Pid}} | +%%% {error, Reason} +%%% ----------------------------------------------------------------- +start(Mod, Args, Options) -> + gen:start(?MODULE, nolink, Mod, Args, Options). + +start(Name, Mod, Args, Options) -> + gen:start(?MODULE, nolink, Name, Mod, Args, Options). + +start_link(Mod, Args, Options) -> + gen:start(?MODULE, link, Mod, Args, Options). + +start_link(Name, Mod, Args, Options) -> + gen:start(?MODULE, link, Name, Mod, Args, Options). + + +%% ----------------------------------------------------------------- +%% Make a call to a generic server. +%% If the server is located at another node, that node will +%% be monitored. +%% If the client is trapping exits and is linked server termination +%% is handled here (? Shall we do that here (or rely on timeouts) ?). +%% ----------------------------------------------------------------- +call(Name, Request) -> + case catch gen:call(Name, '$gen_call', Request) of + {ok,Res} -> + Res; + {'EXIT',Reason} -> + exit({Reason, {?MODULE, call, [Name, Request]}}) + end. + +call(Name, Request, Timeout) -> + case catch gen:call(Name, '$gen_call', Request, Timeout) of + {ok,Res} -> + Res; + {'EXIT',Reason} -> + exit({Reason, {?MODULE, call, [Name, Request, Timeout]}}) + end. + +%% ----------------------------------------------------------------- +%% Make a cast to a generic server. +%% ----------------------------------------------------------------- +cast({global,Name}, Request) -> + catch global:send(Name, cast_msg(Request)), + ok; +cast({Name,Node}=Dest, Request) when is_atom(Name), is_atom(Node) -> + do_cast(Dest, Request); +cast(Dest, Request) when is_atom(Dest) -> + do_cast(Dest, Request); +cast(Dest, Request) when is_pid(Dest) -> + do_cast(Dest, Request). + +do_cast(Dest, Request) -> + do_send(Dest, cast_msg(Request)), + ok. + +cast_msg(Request) -> {'$gen_cast',Request}. + +%% ----------------------------------------------------------------- +%% Send a reply to the client. +%% ----------------------------------------------------------------- +reply({To, Tag}, Reply) -> + catch To ! {Tag, Reply}. + +%% ----------------------------------------------------------------- +%% Asyncronous broadcast, returns nothing, it's just send'n pray +%% ----------------------------------------------------------------- +abcast(Name, Request) when is_atom(Name) -> + do_abcast([node() | nodes()], Name, cast_msg(Request)). + +abcast(Nodes, Name, Request) when is_list(Nodes), is_atom(Name) -> + do_abcast(Nodes, Name, cast_msg(Request)). + +do_abcast([Node|Nodes], Name, Msg) when is_atom(Node) -> + do_send({Name,Node},Msg), + do_abcast(Nodes, Name, Msg); +do_abcast([], _,_) -> abcast. + +%%% ----------------------------------------------------------------- +%%% Make a call to servers at several nodes. +%%% Returns: {[Replies],[BadNodes]} +%%% A Timeout can be given +%%% +%%% A middleman process is used in case late answers arrives after +%%% the timeout. If they would be allowed to glog the callers message +%%% queue, it would probably become confused. Late answers will +%%% now arrive to the terminated middleman and so be discarded. +%%% ----------------------------------------------------------------- +multi_call(Name, Req) + when is_atom(Name) -> + do_multi_call([node() | nodes()], Name, Req, infinity). + +multi_call(Nodes, Name, Req) + when is_list(Nodes), is_atom(Name) -> + do_multi_call(Nodes, Name, Req, infinity). + +multi_call(Nodes, Name, Req, infinity) -> + do_multi_call(Nodes, Name, Req, infinity); +multi_call(Nodes, Name, Req, Timeout) + when is_list(Nodes), is_atom(Name), is_integer(Timeout), Timeout >= 0 -> + do_multi_call(Nodes, Name, Req, Timeout). + + +%%----------------------------------------------------------------- +%% enter_loop(Mod, Options, State, , , ) ->_ +%% +%% Description: Makes an existing process into a gen_server. +%% The calling process will enter the gen_server receive +%% loop and become a gen_server process. +%% The process *must* have been started using one of the +%% start functions in proc_lib, see proc_lib(3). +%% The user is responsible for any initialization of the +%% process, including registering a name for it. +%%----------------------------------------------------------------- +enter_loop(Mod, Options, State) -> + enter_loop(Mod, Options, State, self(), infinity, undefined). + +enter_loop(Mod, Options, State, Backoff = {backoff, _, _ , _}) -> + enter_loop(Mod, Options, State, self(), infinity, Backoff); + +enter_loop(Mod, Options, State, ServerName = {_, _}) -> + enter_loop(Mod, Options, State, ServerName, infinity, undefined); + +enter_loop(Mod, Options, State, Timeout) -> + enter_loop(Mod, Options, State, self(), Timeout, undefined). + +enter_loop(Mod, Options, State, ServerName, Backoff = {backoff, _, _, _}) -> + enter_loop(Mod, Options, State, ServerName, infinity, Backoff); + +enter_loop(Mod, Options, State, ServerName, Timeout) -> + enter_loop(Mod, Options, State, ServerName, Timeout, undefined). + +enter_loop(Mod, Options, State, ServerName, Timeout, Backoff) -> + Name = get_proc_name(ServerName), + Parent = get_parent(), + Debug = debug_options(Name, Options), + Queue = priority_queue:new(), + Backoff1 = extend_backoff(Backoff), + loop(find_prioritisers( + #gs2_state { parent = Parent, name = Name, state = State, + mod = Mod, time = Timeout, timeout_state = Backoff1, + queue = Queue, debug = Debug })). + +%%%======================================================================== +%%% Gen-callback functions +%%%======================================================================== + +%%% --------------------------------------------------- +%%% Initiate the new process. +%%% Register the name using the Rfunc function +%%% Calls the Mod:init/Args function. +%%% Finally an acknowledge is sent to Parent and the main +%%% loop is entered. +%%% --------------------------------------------------- +init_it(Starter, self, Name, Mod, Args, Options) -> + init_it(Starter, self(), Name, Mod, Args, Options); +init_it(Starter, Parent, Name0, Mod, Args, Options) -> + Name = name(Name0), + Debug = debug_options(Name, Options), + Queue = priority_queue:new(), + GS2State = find_prioritisers( + #gs2_state { parent = Parent, + name = Name, + mod = Mod, + queue = Queue, + debug = Debug }), + case catch Mod:init(Args) of + {ok, State} -> + proc_lib:init_ack(Starter, {ok, self()}), + loop(GS2State #gs2_state { state = State, + time = infinity, + timeout_state = undefined }); + {ok, State, Timeout} -> + proc_lib:init_ack(Starter, {ok, self()}), + loop(GS2State #gs2_state { state = State, + time = Timeout, + timeout_state = undefined }); + {ok, State, Timeout, Backoff = {backoff, _, _, _}} -> + Backoff1 = extend_backoff(Backoff), + proc_lib:init_ack(Starter, {ok, self()}), + loop(GS2State #gs2_state { state = State, + time = Timeout, + timeout_state = Backoff1 }); + {stop, Reason} -> + %% For consistency, we must make sure that the + %% registered name (if any) is unregistered before + %% the parent process is notified about the failure. + %% (Otherwise, the parent process could get + %% an 'already_started' error if it immediately + %% tried starting the process again.) + unregister_name(Name0), + proc_lib:init_ack(Starter, {error, Reason}), + exit(Reason); + ignore -> + unregister_name(Name0), + proc_lib:init_ack(Starter, ignore), + exit(normal); + {'EXIT', Reason} -> + unregister_name(Name0), + proc_lib:init_ack(Starter, {error, Reason}), + exit(Reason); + Else -> + Error = {bad_return_value, Else}, + proc_lib:init_ack(Starter, {error, Error}), + exit(Error) + end. + +name({local,Name}) -> Name; +name({global,Name}) -> Name; +%% name(Pid) when is_pid(Pid) -> Pid; +%% when R12 goes away, drop the line beneath and uncomment the line above +name(Name) -> Name. + +unregister_name({local,Name}) -> + _ = (catch unregister(Name)); +unregister_name({global,Name}) -> + _ = global:unregister_name(Name); +unregister_name(Pid) when is_pid(Pid) -> + Pid; +%% Under R12 let's just ignore it, as we have a single term as Name. +%% On R13 it will never get here, as we get tuple with 'local/global' atom. +unregister_name(_Name) -> ok. + +extend_backoff(undefined) -> + undefined; +extend_backoff({backoff, InitialTimeout, MinimumTimeout, DesiredHibPeriod}) -> + {backoff, InitialTimeout, MinimumTimeout, DesiredHibPeriod, now()}. + +%%%======================================================================== +%%% Internal functions +%%%======================================================================== +%%% --------------------------------------------------- +%%% The MAIN loop. +%%% --------------------------------------------------- +loop(GS2State = #gs2_state { time = hibernate, + timeout_state = undefined }) -> + pre_hibernate(GS2State); +loop(GS2State) -> + process_next_msg(drain(GS2State)). + +drain(GS2State) -> + receive + Input -> drain(in(Input, GS2State)) + after 0 -> GS2State + end. + +process_next_msg(GS2State = #gs2_state { time = Time, + timeout_state = TimeoutState, + queue = Queue }) -> + case priority_queue:out(Queue) of + {{value, Msg}, Queue1} -> + process_msg(Msg, GS2State #gs2_state { queue = Queue1 }); + {empty, Queue1} -> + {Time1, HibOnTimeout} + = case {Time, TimeoutState} of + {hibernate, {backoff, Current, _Min, _Desired, _RSt}} -> + {Current, true}; + {hibernate, _} -> + %% wake_hib/7 will set Time to hibernate. If + %% we were woken and didn't receive a msg + %% then we will get here and need a sensible + %% value for Time1, otherwise we crash. + %% R13B1 always waits infinitely when waking + %% from hibernation, so that's what we do + %% here too. + {infinity, false}; + _ -> {Time, false} + end, + receive + Input -> + %% Time could be 'hibernate' here, so *don't* call loop + process_next_msg( + drain(in(Input, GS2State #gs2_state { queue = Queue1 }))) + after Time1 -> + case HibOnTimeout of + true -> + pre_hibernate( + GS2State #gs2_state { queue = Queue1 }); + false -> + process_msg(timeout, + GS2State #gs2_state { queue = Queue1 }) + end + end + end. + +wake_hib(GS2State = #gs2_state { timeout_state = TS }) -> + TimeoutState1 = case TS of + undefined -> + undefined; + {SleptAt, TimeoutState} -> + adjust_timeout_state(SleptAt, now(), TimeoutState) + end, + post_hibernate( + drain(GS2State #gs2_state { timeout_state = TimeoutState1 })). + +hibernate(GS2State = #gs2_state { timeout_state = TimeoutState }) -> + TS = case TimeoutState of + undefined -> undefined; + {backoff, _, _, _, _} -> {now(), TimeoutState} + end, + proc_lib:hibernate(?MODULE, wake_hib, + [GS2State #gs2_state { timeout_state = TS }]). + +pre_hibernate(GS2State = #gs2_state { state = State, + mod = Mod }) -> + case erlang:function_exported(Mod, handle_pre_hibernate, 1) of + true -> + case catch Mod:handle_pre_hibernate(State) of + {hibernate, NState} -> + hibernate(GS2State #gs2_state { state = NState } ); + Reply -> + handle_common_termination(Reply, pre_hibernate, GS2State) + end; + false -> + hibernate(GS2State) + end. + +post_hibernate(GS2State = #gs2_state { state = State, + mod = Mod }) -> + case erlang:function_exported(Mod, handle_post_hibernate, 1) of + true -> + case catch Mod:handle_post_hibernate(State) of + {noreply, NState} -> + process_next_msg(GS2State #gs2_state { state = NState, + time = infinity }); + {noreply, NState, Time} -> + process_next_msg(GS2State #gs2_state { state = NState, + time = Time }); + Reply -> + handle_common_termination(Reply, post_hibernate, GS2State) + end; + false -> + %% use hibernate here, not infinity. This matches + %% R13B. The key is that we should be able to get through + %% to process_msg calling sys:handle_system_msg with Time + %% still set to hibernate, iff that msg is the very msg + %% that woke us up (or the first msg we receive after + %% waking up). + process_next_msg(GS2State #gs2_state { time = hibernate }) + end. + +adjust_timeout_state(SleptAt, AwokeAt, {backoff, CurrentTO, MinimumTO, + DesiredHibPeriod, RandomState}) -> + NapLengthMicros = timer:now_diff(AwokeAt, SleptAt), + CurrentMicros = CurrentTO * 1000, + MinimumMicros = MinimumTO * 1000, + DesiredHibMicros = DesiredHibPeriod * 1000, + GapBetweenMessagesMicros = NapLengthMicros + CurrentMicros, + Base = + %% If enough time has passed between the last two messages then we + %% should consider sleeping sooner. Otherwise stay awake longer. + case GapBetweenMessagesMicros > (MinimumMicros + DesiredHibMicros) of + true -> lists:max([MinimumTO, CurrentTO div 2]); + false -> CurrentTO + end, + {Extra, RandomState1} = random:uniform_s(Base, RandomState), + CurrentTO1 = Base + Extra, + {backoff, CurrentTO1, MinimumTO, DesiredHibPeriod, RandomState1}. + +in({'$gen_cast', Msg} = Input, + GS2State = #gs2_state { prioritise_cast = PC }) -> + in(Input, PC(Msg, GS2State), GS2State); +in({'$gen_call', From, Msg} = Input, + GS2State = #gs2_state { prioritise_call = PC }) -> + in(Input, PC(Msg, From, GS2State), GS2State); +in({'EXIT', Parent, _R} = Input, GS2State = #gs2_state { parent = Parent }) -> + in(Input, infinity, GS2State); +in({system, _From, _Req} = Input, GS2State) -> + in(Input, infinity, GS2State); +in(Input, GS2State = #gs2_state { prioritise_info = PI }) -> + in(Input, PI(Input, GS2State), GS2State). + +in(Input, Priority, GS2State = #gs2_state { queue = Queue }) -> + GS2State # gs2_state { queue = priority_queue:in(Input, Priority, Queue) }. + +process_msg({system, From, Req}, + GS2State = #gs2_state { parent = Parent, debug = Debug }) -> + %% gen_server puts Hib on the end as the 7th arg, but that version + %% of the fun seems not to be documented so leaving out for now. + sys:handle_system_msg(Req, From, Parent, ?MODULE, Debug, GS2State); +process_msg({'EXIT', Parent, Reason} = Msg, + GS2State = #gs2_state { parent = Parent }) -> + terminate(Reason, Msg, GS2State); +process_msg(Msg, GS2State = #gs2_state { debug = [] }) -> + handle_msg(Msg, GS2State); +process_msg(Msg, GS2State = #gs2_state { name = Name, debug = Debug }) -> + Debug1 = sys:handle_debug(Debug, fun print_event/3, Name, {in, Msg}), + handle_msg(Msg, GS2State #gs2_state { debug = Debug1 }). + +%%% --------------------------------------------------- +%%% Send/recive functions +%%% --------------------------------------------------- +do_send(Dest, Msg) -> + catch erlang:send(Dest, Msg). + +do_multi_call(Nodes, Name, Req, infinity) -> + Tag = make_ref(), + Monitors = send_nodes(Nodes, Name, Tag, Req), + rec_nodes(Tag, Monitors, Name, undefined); +do_multi_call(Nodes, Name, Req, Timeout) -> + Tag = make_ref(), + Caller = self(), + Receiver = + spawn( + fun () -> + %% Middleman process. Should be unsensitive to regular + %% exit signals. The sychronization is needed in case + %% the receiver would exit before the caller started + %% the monitor. + process_flag(trap_exit, true), + Mref = erlang:monitor(process, Caller), + receive + {Caller,Tag} -> + Monitors = send_nodes(Nodes, Name, Tag, Req), + TimerId = erlang:start_timer(Timeout, self(), ok), + Result = rec_nodes(Tag, Monitors, Name, TimerId), + exit({self(),Tag,Result}); + {'DOWN',Mref,_,_,_} -> + %% Caller died before sending us the go-ahead. + %% Give up silently. + exit(normal) + end + end), + Mref = erlang:monitor(process, Receiver), + Receiver ! {self(),Tag}, + receive + {'DOWN',Mref,_,_,{Receiver,Tag,Result}} -> + Result; + {'DOWN',Mref,_,_,Reason} -> + %% The middleman code failed. Or someone did + %% exit(_, kill) on the middleman process => Reason==killed + exit(Reason) + end. + +send_nodes(Nodes, Name, Tag, Req) -> + send_nodes(Nodes, Name, Tag, Req, []). + +send_nodes([Node|Tail], Name, Tag, Req, Monitors) + when is_atom(Node) -> + Monitor = start_monitor(Node, Name), + %% Handle non-existing names in rec_nodes. + catch {Name, Node} ! {'$gen_call', {self(), {Tag, Node}}, Req}, + send_nodes(Tail, Name, Tag, Req, [Monitor | Monitors]); +send_nodes([_Node|Tail], Name, Tag, Req, Monitors) -> + %% Skip non-atom Node + send_nodes(Tail, Name, Tag, Req, Monitors); +send_nodes([], _Name, _Tag, _Req, Monitors) -> + Monitors. + +%% Against old nodes: +%% If no reply has been delivered within 2 secs. (per node) check that +%% the server really exists and wait for ever for the answer. +%% +%% Against contemporary nodes: +%% Wait for reply, server 'DOWN', or timeout from TimerId. + +rec_nodes(Tag, Nodes, Name, TimerId) -> + rec_nodes(Tag, Nodes, Name, [], [], 2000, TimerId). + +rec_nodes(Tag, [{N,R}|Tail], Name, Badnodes, Replies, Time, TimerId ) -> + receive + {'DOWN', R, _, _, _} -> + rec_nodes(Tag, Tail, Name, [N|Badnodes], Replies, Time, TimerId); + {{Tag, N}, Reply} -> %% Tag is bound !!! + unmonitor(R), + rec_nodes(Tag, Tail, Name, Badnodes, + [{N,Reply}|Replies], Time, TimerId); + {timeout, TimerId, _} -> + unmonitor(R), + %% Collect all replies that already have arrived + rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies) + end; +rec_nodes(Tag, [N|Tail], Name, Badnodes, Replies, Time, TimerId) -> + %% R6 node + receive + {nodedown, N} -> + monitor_node(N, false), + rec_nodes(Tag, Tail, Name, [N|Badnodes], Replies, 2000, TimerId); + {{Tag, N}, Reply} -> %% Tag is bound !!! + receive {nodedown, N} -> ok after 0 -> ok end, + monitor_node(N, false), + rec_nodes(Tag, Tail, Name, Badnodes, + [{N,Reply}|Replies], 2000, TimerId); + {timeout, TimerId, _} -> + receive {nodedown, N} -> ok after 0 -> ok end, + monitor_node(N, false), + %% Collect all replies that already have arrived + rec_nodes_rest(Tag, Tail, Name, [N | Badnodes], Replies) + after Time -> + case rpc:call(N, erlang, whereis, [Name]) of + Pid when is_pid(Pid) -> % It exists try again. + rec_nodes(Tag, [N|Tail], Name, Badnodes, + Replies, infinity, TimerId); + _ -> % badnode + receive {nodedown, N} -> ok after 0 -> ok end, + monitor_node(N, false), + rec_nodes(Tag, Tail, Name, [N|Badnodes], + Replies, 2000, TimerId) + end + end; +rec_nodes(_, [], _, Badnodes, Replies, _, TimerId) -> + case catch erlang:cancel_timer(TimerId) of + false -> % It has already sent it's message + receive + {timeout, TimerId, _} -> ok + after 0 -> + ok + end; + _ -> % Timer was cancelled, or TimerId was 'undefined' + ok + end, + {Replies, Badnodes}. + +%% Collect all replies that already have arrived +rec_nodes_rest(Tag, [{N,R}|Tail], Name, Badnodes, Replies) -> + receive + {'DOWN', R, _, _, _} -> + rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies); + {{Tag, N}, Reply} -> %% Tag is bound !!! + unmonitor(R), + rec_nodes_rest(Tag, Tail, Name, Badnodes, [{N,Reply}|Replies]) + after 0 -> + unmonitor(R), + rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies) + end; +rec_nodes_rest(Tag, [N|Tail], Name, Badnodes, Replies) -> + %% R6 node + receive + {nodedown, N} -> + monitor_node(N, false), + rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies); + {{Tag, N}, Reply} -> %% Tag is bound !!! + receive {nodedown, N} -> ok after 0 -> ok end, + monitor_node(N, false), + rec_nodes_rest(Tag, Tail, Name, Badnodes, [{N,Reply}|Replies]) + after 0 -> + receive {nodedown, N} -> ok after 0 -> ok end, + monitor_node(N, false), + rec_nodes_rest(Tag, Tail, Name, [N|Badnodes], Replies) + end; +rec_nodes_rest(_Tag, [], _Name, Badnodes, Replies) -> + {Replies, Badnodes}. + + +%%% --------------------------------------------------- +%%% Monitor functions +%%% --------------------------------------------------- + +start_monitor(Node, Name) when is_atom(Node), is_atom(Name) -> + if node() =:= nonode@nohost, Node =/= nonode@nohost -> + Ref = make_ref(), + self() ! {'DOWN', Ref, process, {Name, Node}, noconnection}, + {Node, Ref}; + true -> + case catch erlang:monitor(process, {Name, Node}) of + {'EXIT', _} -> + %% Remote node is R6 + monitor_node(Node, true), + Node; + Ref when is_reference(Ref) -> + {Node, Ref} + end + end. + +%% Cancels a monitor started with Ref=erlang:monitor(_, _). +unmonitor(Ref) when is_reference(Ref) -> + erlang:demonitor(Ref), + receive + {'DOWN', Ref, _, _, _} -> + true + after 0 -> + true + end. + +%%% --------------------------------------------------- +%%% Message handling functions +%%% --------------------------------------------------- + +dispatch({'$gen_cast', Msg}, Mod, State) -> + Mod:handle_cast(Msg, State); +dispatch(Info, Mod, State) -> + Mod:handle_info(Info, State). + +common_reply(_Name, From, Reply, _NState, [] = _Debug) -> + reply(From, Reply), + []; +common_reply(Name, From, Reply, NState, Debug) -> + reply(Name, From, Reply, NState, Debug). + +common_debug([] = _Debug, _Func, _Info, _Event) -> + []; +common_debug(Debug, Func, Info, Event) -> + sys:handle_debug(Debug, Func, Info, Event). + +handle_msg({'$gen_call', From, Msg}, GS2State = #gs2_state { mod = Mod, + state = State, + name = Name, + debug = Debug }) -> + case catch Mod:handle_call(Msg, From, State) of + {reply, Reply, NState} -> + Debug1 = common_reply(Name, From, Reply, NState, Debug), + loop(GS2State #gs2_state { state = NState, + time = infinity, + debug = Debug1 }); + {reply, Reply, NState, Time1} -> + Debug1 = common_reply(Name, From, Reply, NState, Debug), + loop(GS2State #gs2_state { state = NState, + time = Time1, + debug = Debug1}); + {noreply, NState} -> + Debug1 = common_debug(Debug, fun print_event/3, Name, + {noreply, NState}), + loop(GS2State #gs2_state {state = NState, + time = infinity, + debug = Debug1}); + {noreply, NState, Time1} -> + Debug1 = common_debug(Debug, fun print_event/3, Name, + {noreply, NState}), + loop(GS2State #gs2_state {state = NState, + time = Time1, + debug = Debug1}); + {stop, Reason, Reply, NState} -> + {'EXIT', R} = + (catch terminate(Reason, Msg, + GS2State #gs2_state { state = NState })), + reply(Name, From, Reply, NState, Debug), + exit(R); + Other -> + handle_common_reply(Other, Msg, GS2State) + end; +handle_msg(Msg, GS2State = #gs2_state { mod = Mod, state = State }) -> + Reply = (catch dispatch(Msg, Mod, State)), + handle_common_reply(Reply, Msg, GS2State). + +handle_common_reply(Reply, Msg, GS2State = #gs2_state { name = Name, + debug = Debug}) -> + case Reply of + {noreply, NState} -> + Debug1 = common_debug(Debug, fun print_event/3, Name, + {noreply, NState}), + loop(GS2State #gs2_state { state = NState, + time = infinity, + debug = Debug1 }); + {noreply, NState, Time1} -> + Debug1 = common_debug(Debug, fun print_event/3, Name, + {noreply, NState}), + loop(GS2State #gs2_state { state = NState, + time = Time1, + debug = Debug1 }); + {become, Mod, NState} -> + Debug1 = common_debug(Debug, fun print_event/3, Name, + {become, Mod, NState}), + loop(find_prioritisers( + GS2State #gs2_state { mod = Mod, + state = NState, + time = infinity, + debug = Debug1 })); + {become, Mod, NState, Time1} -> + Debug1 = common_debug(Debug, fun print_event/3, Name, + {become, Mod, NState}), + loop(find_prioritisers( + GS2State #gs2_state { mod = Mod, + state = NState, + time = Time1, + debug = Debug1 })); + _ -> + handle_common_termination(Reply, Msg, GS2State) + end. + +handle_common_termination(Reply, Msg, GS2State) -> + case Reply of + {stop, Reason, NState} -> + terminate(Reason, Msg, GS2State #gs2_state { state = NState }); + {'EXIT', What} -> + terminate(What, Msg, GS2State); + _ -> + terminate({bad_return_value, Reply}, Msg, GS2State) + end. + +reply(Name, {To, Tag}, Reply, State, Debug) -> + reply({To, Tag}, Reply), + sys:handle_debug( + Debug, fun print_event/3, Name, {out, Reply, To, State}). + + +%%----------------------------------------------------------------- +%% Callback functions for system messages handling. +%%----------------------------------------------------------------- +system_continue(Parent, Debug, GS2State) -> + loop(GS2State #gs2_state { parent = Parent, debug = Debug }). + +system_terminate(Reason, _Parent, Debug, GS2State) -> + terminate(Reason, [], GS2State #gs2_state { debug = Debug }). + +system_code_change(GS2State = #gs2_state { mod = Mod, + state = State }, + _Module, OldVsn, Extra) -> + case catch Mod:code_change(OldVsn, State, Extra) of + {ok, NewState} -> + NewGS2State = find_prioritisers( + GS2State #gs2_state { state = NewState }), + {ok, [NewGS2State]}; + Else -> + Else + end. + +%%----------------------------------------------------------------- +%% Format debug messages. Print them as the call-back module sees +%% them, not as the real erlang messages. Use trace for that. +%%----------------------------------------------------------------- +print_event(Dev, {in, Msg}, Name) -> + case Msg of + {'$gen_call', {From, _Tag}, Call} -> + io:format(Dev, "*DBG* ~p got call ~p from ~w~n", + [Name, Call, From]); + {'$gen_cast', Cast} -> + io:format(Dev, "*DBG* ~p got cast ~p~n", + [Name, Cast]); + _ -> + io:format(Dev, "*DBG* ~p got ~p~n", [Name, Msg]) + end; +print_event(Dev, {out, Msg, To, State}, Name) -> + io:format(Dev, "*DBG* ~p sent ~p to ~w, new state ~w~n", + [Name, Msg, To, State]); +print_event(Dev, {noreply, State}, Name) -> + io:format(Dev, "*DBG* ~p new state ~w~n", [Name, State]); +print_event(Dev, Event, Name) -> + io:format(Dev, "*DBG* ~p dbg ~p~n", [Name, Event]). + + +%%% --------------------------------------------------- +%%% Terminate the server. +%%% --------------------------------------------------- + +terminate(Reason, Msg, #gs2_state { name = Name, + mod = Mod, + state = State, + debug = Debug }) -> + case catch Mod:terminate(Reason, State) of + {'EXIT', R} -> + error_info(R, Reason, Name, Msg, State, Debug), + exit(R); + _ -> + case Reason of + normal -> + exit(normal); + shutdown -> + exit(shutdown); + {shutdown,_}=Shutdown -> + exit(Shutdown); + _ -> + error_info(Reason, undefined, Name, Msg, State, Debug), + exit(Reason) + end + end. + +error_info(_Reason, _RootCause, application_controller, _Msg, _State, _Debug) -> + %% OTP-5811 Don't send an error report if it's the system process + %% application_controller which is terminating - let init take care + %% of it instead + ok; +error_info(Reason, RootCause, Name, Msg, State, Debug) -> + Reason1 = error_reason(Reason), + Fmt = + "** Generic server ~p terminating~n" + "** Last message in was ~p~n" + "** When Server state == ~p~n" + "** Reason for termination == ~n** ~p~n", + case RootCause of + undefined -> format(Fmt, [Name, Msg, State, Reason1]); + _ -> format(Fmt ++ "** In 'terminate' callback " + "with reason ==~n** ~p~n", + [Name, Msg, State, Reason1, + error_reason(RootCause)]) + end, + sys:print_log(Debug), + ok. + +error_reason({undef,[{M,F,A}|MFAs]} = Reason) -> + case code:is_loaded(M) of + false -> {'module could not be loaded',[{M,F,A}|MFAs]}; + _ -> case erlang:function_exported(M, F, length(A)) of + true -> Reason; + false -> {'function not exported',[{M,F,A}|MFAs]} + end + end; +error_reason(Reason) -> + Reason. + +%%% --------------------------------------------------- +%%% Misc. functions. +%%% --------------------------------------------------- + +opt(Op, [{Op, Value}|_]) -> + {ok, Value}; +opt(Op, [_|Options]) -> + opt(Op, Options); +opt(_, []) -> + false. + +debug_options(Name, Opts) -> + case opt(debug, Opts) of + {ok, Options} -> dbg_options(Name, Options); + _ -> dbg_options(Name, []) + end. + +dbg_options(Name, []) -> + Opts = + case init:get_argument(generic_debug) of + error -> + []; + _ -> + [log, statistics] + end, + dbg_opts(Name, Opts); +dbg_options(Name, Opts) -> + dbg_opts(Name, Opts). + +dbg_opts(Name, Opts) -> + case catch sys:debug_options(Opts) of + {'EXIT',_} -> + format("~p: ignoring erroneous debug options - ~p~n", + [Name, Opts]), + []; + Dbg -> + Dbg + end. + +get_proc_name(Pid) when is_pid(Pid) -> + Pid; +get_proc_name({local, Name}) -> + case process_info(self(), registered_name) of + {registered_name, Name} -> + Name; + {registered_name, _Name} -> + exit(process_not_registered); + [] -> + exit(process_not_registered) + end; +get_proc_name({global, Name}) -> + case whereis_name(Name) of + undefined -> + exit(process_not_registered_globally); + Pid when Pid =:= self() -> + Name; + _Pid -> + exit(process_not_registered_globally) + end. + +get_parent() -> + case get('$ancestors') of + [Parent | _] when is_pid(Parent)-> + Parent; + [Parent | _] when is_atom(Parent)-> + name_to_pid(Parent); + _ -> + exit(process_was_not_started_by_proc_lib) + end. + +name_to_pid(Name) -> + case whereis(Name) of + undefined -> + case whereis_name(Name) of + undefined -> + exit(could_not_find_registerd_name); + Pid -> + Pid + end; + Pid -> + Pid + end. + +whereis_name(Name) -> + case ets:lookup(global_names, Name) of + [{_Name, Pid, _Method, _RPid, _Ref}] -> + if node(Pid) == node() -> + case is_process_alive(Pid) of + true -> Pid; + false -> undefined + end; + true -> + Pid + end; + [] -> undefined + end. + +find_prioritisers(GS2State = #gs2_state { mod = Mod }) -> + PrioriCall = function_exported_or_default( + Mod, 'prioritise_call', 3, + fun (_Msg, _From, _State) -> 0 end), + PrioriCast = function_exported_or_default(Mod, 'prioritise_cast', 2, + fun (_Msg, _State) -> 0 end), + PrioriInfo = function_exported_or_default(Mod, 'prioritise_info', 2, + fun (_Msg, _State) -> 0 end), + GS2State #gs2_state { prioritise_call = PrioriCall, + prioritise_cast = PrioriCast, + prioritise_info = PrioriInfo }. + +function_exported_or_default(Mod, Fun, Arity, Default) -> + case erlang:function_exported(Mod, Fun, Arity) of + true -> case Arity of + 2 -> fun (Msg, GS2State = #gs2_state { state = State }) -> + case catch Mod:Fun(Msg, State) of + Res when is_integer(Res) -> + Res; + Err -> + handle_common_termination(Err, Msg, GS2State) + end + end; + 3 -> fun (Msg, From, GS2State = #gs2_state { state = State }) -> + case catch Mod:Fun(Msg, From, State) of + Res when is_integer(Res) -> + Res; + Err -> + handle_common_termination(Err, Msg, GS2State) + end + end + end; + false -> Default + end. + +%%----------------------------------------------------------------- +%% Status information +%%----------------------------------------------------------------- +format_status(Opt, StatusData) -> + [PDict, SysState, Parent, Debug, + #gs2_state{name = Name, state = State, mod = Mod, queue = Queue}] = + StatusData, + NameTag = if is_pid(Name) -> + pid_to_list(Name); + is_atom(Name) -> + Name + end, + Header = lists:concat(["Status for generic server ", NameTag]), + Log = sys:get_debug(log, Debug, []), + Specfic = callback(Mod, format_status, [Opt, [PDict, State]], + fun () -> [{data, [{"State", State}]}] end), + Messages = callback(Mod, format_message_queue, [Opt, Queue], + fun () -> priority_queue:to_list(Queue) end), + [{header, Header}, + {data, [{"Status", SysState}, + {"Parent", Parent}, + {"Logged events", Log}, + {"Queued messages", Messages}]} | + Specfic]. + +callback(Mod, FunName, Args, DefaultThunk) -> + case erlang:function_exported(Mod, FunName, length(Args)) of + true -> case catch apply(Mod, FunName, Args) of + {'EXIT', _} -> DefaultThunk(); + Success -> Success + end; + false -> DefaultThunk() + end. diff --git a/src/priority_queue.erl b/src/priority_queue.erl new file mode 100644 index 000000000..780fa2e92 --- /dev/null +++ b/src/priority_queue.erl @@ -0,0 +1,194 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% + +%% Priority queues have essentially the same interface as ordinary +%% queues, except that a) there is an in/3 that takes a priority, and +%% b) we have only implemented the core API we need. +%% +%% Priorities should be integers - the higher the value the higher the +%% priority - but we don't actually check that. +%% +%% in/2 inserts items with priority 0. +%% +%% We optimise the case where a priority queue is being used just like +%% an ordinary queue. When that is the case we represent the priority +%% queue as an ordinary queue. We could just call into the 'queue' +%% module for that, but for efficiency we implement the relevant +%% functions directly in here, thus saving on inter-module calls and +%% eliminating a level of boxing. +%% +%% When the queue contains items with non-zero priorities, it is +%% represented as a sorted kv list with the inverted Priority as the +%% key and an ordinary queue as the value. Here again we use our own +%% ordinary queue implemention for efficiency, often making recursive +%% calls into the same function knowing that ordinary queues represent +%% a base case. + + +-module(priority_queue). + +-export([new/0, is_queue/1, is_empty/1, len/1, to_list/1, in/2, in/3, + out/1, join/2]). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-export_type([q/0]). + +-type(q() :: pqueue()). +-type(priority() :: integer() | 'infinity'). +-type(squeue() :: {queue, [any()], [any()]}). +-type(pqueue() :: squeue() | {pqueue, [{priority(), squeue()}]}). + +-spec(new/0 :: () -> pqueue()). +-spec(is_queue/1 :: (any()) -> boolean()). +-spec(is_empty/1 :: (pqueue()) -> boolean()). +-spec(len/1 :: (pqueue()) -> non_neg_integer()). +-spec(to_list/1 :: (pqueue()) -> [{priority(), any()}]). +-spec(in/2 :: (any(), pqueue()) -> pqueue()). +-spec(in/3 :: (any(), priority(), pqueue()) -> pqueue()). +-spec(out/1 :: (pqueue()) -> {empty | {value, any()}, pqueue()}). +-spec(join/2 :: (pqueue(), pqueue()) -> pqueue()). + +-endif. + +%%---------------------------------------------------------------------------- + +new() -> + {queue, [], []}. + +is_queue({queue, R, F}) when is_list(R), is_list(F) -> + true; +is_queue({pqueue, Queues}) when is_list(Queues) -> + lists:all(fun ({infinity, Q}) -> is_queue(Q); + ({P, Q}) -> is_integer(P) andalso is_queue(Q) + end, Queues); +is_queue(_) -> + false. + +is_empty({queue, [], []}) -> + true; +is_empty(_) -> + false. + +len({queue, R, F}) when is_list(R), is_list(F) -> + length(R) + length(F); +len({pqueue, Queues}) -> + lists:sum([len(Q) || {_, Q} <- Queues]). + +to_list({queue, In, Out}) when is_list(In), is_list(Out) -> + [{0, V} || V <- Out ++ lists:reverse(In, [])]; +to_list({pqueue, Queues}) -> + [{maybe_negate_priority(P), V} || {P, Q} <- Queues, + {0, V} <- to_list(Q)]. + +in(Item, Q) -> + in(Item, 0, Q). + +in(X, 0, {queue, [_] = In, []}) -> + {queue, [X], In}; +in(X, 0, {queue, In, Out}) when is_list(In), is_list(Out) -> + {queue, [X|In], Out}; +in(X, Priority, _Q = {queue, [], []}) -> + in(X, Priority, {pqueue, []}); +in(X, Priority, Q = {queue, _, _}) -> + in(X, Priority, {pqueue, [{0, Q}]}); +in(X, Priority, {pqueue, Queues}) -> + P = maybe_negate_priority(Priority), + {pqueue, case lists:keysearch(P, 1, Queues) of + {value, {_, Q}} -> + lists:keyreplace(P, 1, Queues, {P, in(X, Q)}); + false when P == infinity -> + [{P, {queue, [X], []}} | Queues]; + false -> + case Queues of + [{infinity, InfQueue} | Queues1] -> + [{infinity, InfQueue} | + lists:keysort(1, [{P, {queue, [X], []}} | Queues1])]; + _ -> + lists:keysort(1, [{P, {queue, [X], []}} | Queues]) + end + end}. + +out({queue, [], []} = Q) -> + {empty, Q}; +out({queue, [V], []}) -> + {{value, V}, {queue, [], []}}; +out({queue, [Y|In], []}) -> + [V|Out] = lists:reverse(In, []), + {{value, V}, {queue, [Y], Out}}; +out({queue, In, [V]}) when is_list(In) -> + {{value,V}, r2f(In)}; +out({queue, In,[V|Out]}) when is_list(In) -> + {{value, V}, {queue, In, Out}}; +out({pqueue, [{P, Q} | Queues]}) -> + {R, Q1} = out(Q), + NewQ = case is_empty(Q1) of + true -> case Queues of + [] -> {queue, [], []}; + [{0, OnlyQ}] -> OnlyQ; + [_|_] -> {pqueue, Queues} + end; + false -> {pqueue, [{P, Q1} | Queues]} + end, + {R, NewQ}. + +join(A, {queue, [], []}) -> + A; +join({queue, [], []}, B) -> + B; +join({queue, AIn, AOut}, {queue, BIn, BOut}) -> + {queue, BIn, AOut ++ lists:reverse(AIn, BOut)}; +join(A = {queue, _, _}, {pqueue, BPQ}) -> + {Pre, Post} = + lists:splitwith(fun ({P, _}) -> P < 0 orelse P == infinity end, BPQ), + Post1 = case Post of + [] -> [ {0, A} ]; + [ {0, ZeroQueue} | Rest ] -> [ {0, join(A, ZeroQueue)} | Rest ]; + _ -> [ {0, A} | Post ] + end, + {pqueue, Pre ++ Post1}; +join({pqueue, APQ}, B = {queue, _, _}) -> + {Pre, Post} = + lists:splitwith(fun ({P, _}) -> P < 0 orelse P == infinity end, APQ), + Post1 = case Post of + [] -> [ {0, B} ]; + [ {0, ZeroQueue} | Rest ] -> [ {0, join(ZeroQueue, B)} | Rest ]; + _ -> [ {0, B} | Post ] + end, + {pqueue, Pre ++ Post1}; +join({pqueue, APQ}, {pqueue, BPQ}) -> + {pqueue, merge(APQ, BPQ, [])}. + +merge([], BPQ, Acc) -> + lists:reverse(Acc, BPQ); +merge(APQ, [], Acc) -> + lists:reverse(Acc, APQ); +merge([{P, A}|As], [{P, B}|Bs], Acc) -> + merge(As, Bs, [ {P, join(A, B)} | Acc ]); +merge([{PA, A}|As], Bs = [{PB, _}|_], Acc) when PA < PB orelse PA == infinity -> + merge(As, Bs, [ {PA, A} | Acc ]); +merge(As = [{_, _}|_], [{PB, B}|Bs], Acc) -> + merge(As, Bs, [ {PB, B} | Acc ]). + +r2f([]) -> {queue, [], []}; +r2f([_] = R) -> {queue, [], R}; +r2f([X,Y]) -> {queue, [X], [Y]}; +r2f([X,Y|R]) -> {queue, [X,Y], lists:reverse(R, [])}. + +maybe_negate_priority(infinity) -> infinity; +maybe_negate_priority(P) -> -P. diff --git a/src/supervisor2.erl b/src/supervisor2.erl new file mode 100644 index 000000000..5af38573f --- /dev/null +++ b/src/supervisor2.erl @@ -0,0 +1,1232 @@ +%% This file is a copy of supervisor.erl from the R13B-3 Erlang/OTP +%% distribution, with the following modifications: +%% +%% 1) the module name is supervisor2 +%% +%% 2) there is a new strategy called +%% simple_one_for_one_terminate. This is exactly the same as for +%% simple_one_for_one, except that children *are* explicitly +%% terminated as per the shutdown component of the child_spec. +%% +%% 3) child specifications can contain, as the restart type, a tuple +%% {permanent, Delay} | {transient, Delay} | {intrinsic, Delay} +%% where Delay >= 0 (see point (4) below for intrinsic). The delay, +%% in seconds, indicates what should happen if a child, upon being +%% restarted, exceeds the MaxT and MaxR parameters. Thus, if a +%% child exits, it is restarted as normal. If it exits sufficiently +%% quickly and often to exceed the boundaries set by the MaxT and +%% MaxR parameters, and a Delay is specified, then rather than +%% stopping the supervisor, the supervisor instead continues and +%% tries to start up the child again, Delay seconds later. +%% +%% Note that you can never restart more frequently than the MaxT +%% and MaxR parameters allow: i.e. you must wait until *both* the +%% Delay has passed *and* the MaxT and MaxR parameters allow the +%% child to be restarted. +%% +%% Also note that the Delay is a *minimum*. There is no guarantee +%% that the child will be restarted within that time, especially if +%% other processes are dying and being restarted at the same time - +%% essentially we have to wait for the delay to have passed and for +%% the MaxT and MaxR parameters to permit the child to be +%% restarted. This may require waiting for longer than Delay. +%% +%% Sometimes, you may wish for a transient or intrinsic child to +%% exit abnormally so that it gets restarted, but still log +%% nothing. gen_server will log any exit reason other than +%% 'normal', 'shutdown' or {'shutdown', _}. Thus the exit reason of +%% {'shutdown', 'restart'} is interpreted to mean you wish the +%% child to be restarted according to the delay parameters, but +%% gen_server will not log the error. Thus from gen_server's +%% perspective it's a normal exit, whilst from supervisor's +%% perspective, it's an abnormal exit. +%% +%% 4) Added an 'intrinsic' restart type. Like the transient type, this +%% type means the child should only be restarted if the child exits +%% abnormally. Unlike the transient type, if the child exits +%% normally, the supervisor itself also exits normally. If the +%% child is a supervisor and it exits normally (i.e. with reason of +%% 'shutdown') then the child's parent also exits normally. +%% +%% 5) normal, and {shutdown, _} exit reasons are all treated the same +%% (i.e. are regarded as normal exits) +%% +%% All modifications are (C) 2010-2012 VMware, Inc. +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(supervisor2). + +-behaviour(gen_server). + +%% External exports +-export([start_link/2,start_link/3, + start_child/2, restart_child/2, + delete_child/2, terminate_child/2, + which_children/1, find_child/2, + check_childspecs/1]). + +%% Internal exports +-export([init/1, handle_call/3, handle_info/2, terminate/2, code_change/3]). +-export([handle_cast/2]). + +-define(DICT, dict). + +-record(state, {name, + strategy, + children = [], + dynamics = ?DICT:new(), + intensity, + period, + restarts = [], + module, + args}). + +-record(child, {pid = undefined, % pid is undefined when child is not running + name, + mfa, + restart_type, + shutdown, + child_type, + modules = []}). + +-define(is_simple(State), State#state.strategy =:= simple_one_for_one orelse + State#state.strategy =:= simple_one_for_one_terminate). +-define(is_terminate_simple(State), + State#state.strategy =:= simple_one_for_one_terminate). + +-ifdef(use_specs). + +%%-------------------------------------------------------------------------- +%% Types +%%-------------------------------------------------------------------------- + +-export_type([child_spec/0, startchild_ret/0, strategy/0, sup_name/0]). + +-type child() :: 'undefined' | pid(). +-type child_id() :: term(). +-type mfargs() :: {M :: module(), F :: atom(), A :: [term()] | undefined}. +-type modules() :: [module()] | 'dynamic'. +-type delay() :: non_neg_integer(). +-type restart() :: 'permanent' | 'transient' | 'temporary' | 'intrinsic' + | {'permanent', delay()} | {'transient', delay()} + | {'intrinsic', delay()}. +-type shutdown() :: 'brutal_kill' | timeout(). +-type worker() :: 'worker' | 'supervisor'. +-type sup_name() :: {'local', Name :: atom()} | {'global', Name :: atom()}. +-type sup_ref() :: (Name :: atom()) + | {Name :: atom(), Node :: node()} + | {'global', Name :: atom()} + | pid(). +-type child_spec() :: {Id :: child_id(), + StartFunc :: mfargs(), + Restart :: restart(), + Shutdown :: shutdown(), + Type :: worker(), + Modules :: modules()}. + + +-type strategy() :: 'one_for_all' | 'one_for_one' + | 'rest_for_one' | 'simple_one_for_one' + | 'simple_one_for_one_terminate'. + +-type child_rec() :: #child{pid :: child() | {restarting,pid()} | [pid()], + name :: child_id(), + mfa :: mfargs(), + restart_type :: restart(), + shutdown :: shutdown(), + child_type :: worker(), + modules :: modules()}. + +-type state() :: #state{strategy :: strategy(), + children :: [child_rec()], + dynamics :: ?DICT(), + intensity :: non_neg_integer(), + period :: pos_integer()}. + +%%-------------------------------------------------------------------------- +%% Callback behaviour +%%-------------------------------------------------------------------------- + +-callback init(Args :: term()) -> + {ok, {{RestartStrategy :: strategy(), + MaxR :: non_neg_integer(), + MaxT :: non_neg_integer()}, + [ChildSpec :: child_spec()]}} + | ignore. + +%%-------------------------------------------------------------------------- +%% Specs +%%-------------------------------------------------------------------------- + +-type startchild_err() :: 'already_present' + | {'already_started', Child :: child()} | term(). +-type startchild_ret() :: {'ok', Child :: child()} + | {'ok', Child :: child(), Info :: term()} + | {'error', startchild_err()}. + +-spec start_child(SupRef, ChildSpec) -> startchild_ret() when + SupRef :: sup_ref(), + ChildSpec :: child_spec() | (List :: [term()]). + +-spec restart_child(SupRef, Id) -> Result when + SupRef :: sup_ref(), + Id :: child_id(), + Result :: {'ok', Child :: child()} + | {'ok', Child :: child(), Info :: term()} + | {'error', Error}, + Error :: 'running' | 'not_found' | 'simple_one_for_one' | term(). + +-spec delete_child(SupRef, Id) -> Result when + SupRef :: sup_ref(), + Id :: child_id(), + Result :: 'ok' | {'error', Error}, + Error :: 'running' | 'not_found' | 'simple_one_for_one'. + +-spec terminate_child(SupRef, Id) -> Result when + SupRef :: sup_ref(), + Id :: pid() | child_id(), + Result :: 'ok' | {'error', Error}, + Error :: 'not_found' | 'simple_one_for_one'. + +-spec which_children(SupRef) -> [{Id,Child,Type,Modules}] when + SupRef :: sup_ref(), + Id :: child_id() | 'undefined', + Child :: child(), + Type :: worker(), + Modules :: modules(). + +-spec check_childspecs(ChildSpecs) -> Result when + ChildSpecs :: [child_spec()], + Result :: 'ok' | {'error', Error :: term()}. + +-type init_sup_name() :: sup_name() | 'self'. + +-type stop_rsn() :: 'shutdown' | {'bad_return', {module(),'init', term()}} + | {'bad_start_spec', term()} | {'start_spec', term()} + | {'supervisor_data', term()}. + +-spec init({init_sup_name(), module(), [term()]}) -> + {'ok', state()} | 'ignore' | {'stop', stop_rsn()}. + +-type call() :: 'which_children'. +-spec handle_call(call(), term(), state()) -> {'reply', term(), state()}. + +-spec handle_cast('null', state()) -> {'noreply', state()}. + +-spec handle_info(term(), state()) -> + {'noreply', state()} | {'stop', 'shutdown', state()}. + +-spec terminate(term(), state()) -> 'ok'. + +-spec code_change(term(), state(), term()) -> + {'ok', state()} | {'error', term()}. + +-else. + +-export([behaviour_info/1]). + +behaviour_info(callbacks) -> + [{init,1}]; +behaviour_info(_Other) -> + undefined. + +-endif. + +%%% --------------------------------------------------- +%%% This is a general process supervisor built upon gen_server.erl. +%%% Servers/processes should/could also be built using gen_server.erl. +%%% SupName = {local, atom()} | {global, atom()}. +%%% --------------------------------------------------- +start_link(Mod, Args) -> + gen_server:start_link(?MODULE, {self, Mod, Args}, []). + +start_link(SupName, Mod, Args) -> + gen_server:start_link(SupName, ?MODULE, {SupName, Mod, Args}, []). + +%%% --------------------------------------------------- +%%% Interface functions. +%%% --------------------------------------------------- +start_child(Supervisor, ChildSpec) -> + call(Supervisor, {start_child, ChildSpec}). + +restart_child(Supervisor, Name) -> + call(Supervisor, {restart_child, Name}). + +delete_child(Supervisor, Name) -> + call(Supervisor, {delete_child, Name}). + +%%----------------------------------------------------------------- +%% Func: terminate_child/2 +%% Returns: ok | {error, Reason} +%% Note that the child is *always* terminated in some +%% way (maybe killed). +%%----------------------------------------------------------------- +terminate_child(Supervisor, Name) -> + call(Supervisor, {terminate_child, Name}). + +which_children(Supervisor) -> + call(Supervisor, which_children). + +find_child(Supervisor, Name) -> + [Pid || {Name1, Pid, _Type, _Modules} <- which_children(Supervisor), + Name1 =:= Name]. + +call(Supervisor, Req) -> + gen_server:call(Supervisor, Req, infinity). + +check_childspecs(ChildSpecs) when is_list(ChildSpecs) -> + case check_startspec(ChildSpecs) of + {ok, _} -> ok; + Error -> {error, Error} + end; +check_childspecs(X) -> {error, {badarg, X}}. + +%%% --------------------------------------------------- +%%% +%%% Initialize the supervisor. +%%% +%%% --------------------------------------------------- +init({SupName, Mod, Args}) -> + process_flag(trap_exit, true), + case Mod:init(Args) of + {ok, {SupFlags, StartSpec}} -> + case init_state(SupName, SupFlags, Mod, Args) of + {ok, State} when ?is_simple(State) -> + init_dynamic(State, StartSpec); + {ok, State} -> + init_children(State, StartSpec); + Error -> + {stop, {supervisor_data, Error}} + end; + ignore -> + ignore; + Error -> + {stop, {bad_return, {Mod, init, Error}}} + end. + +init_children(State, StartSpec) -> + SupName = State#state.name, + case check_startspec(StartSpec) of + {ok, Children} -> + case start_children(Children, SupName) of + {ok, NChildren} -> + {ok, State#state{children = NChildren}}; + {error, NChildren} -> + terminate_children(NChildren, SupName), + {stop, shutdown} + end; + Error -> + {stop, {start_spec, Error}} + end. + +init_dynamic(State, [StartSpec]) -> + case check_startspec([StartSpec]) of + {ok, Children} -> + {ok, State#state{children = Children}}; + Error -> + {stop, {start_spec, Error}} + end; +init_dynamic(_State, StartSpec) -> + {stop, {bad_start_spec, StartSpec}}. + +%%----------------------------------------------------------------- +%% Func: start_children/2 +%% Args: Children = [#child] in start order +%% SupName = {local, atom()} | {global, atom()} | {pid(),Mod} +%% Purpose: Start all children. The new list contains #child's +%% with pids. +%% Returns: {ok, NChildren} | {error, NChildren} +%% NChildren = [#child] in termination order (reversed +%% start order) +%%----------------------------------------------------------------- +start_children(Children, SupName) -> start_children(Children, [], SupName). + +start_children([Child|Chs], NChildren, SupName) -> + case do_start_child(SupName, Child) of + {ok, Pid} -> + start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName); + {ok, Pid, _Extra} -> + start_children(Chs, [Child#child{pid = Pid}|NChildren], SupName); + {error, Reason} -> + report_error(start_error, Reason, Child, SupName), + {error, lists:reverse(Chs) ++ [Child | NChildren]} + end; +start_children([], NChildren, _SupName) -> + {ok, NChildren}. + +do_start_child(SupName, Child) -> + #child{mfa = {M, F, A}} = Child, + case catch apply(M, F, A) of + {ok, Pid} when is_pid(Pid) -> + NChild = Child#child{pid = Pid}, + report_progress(NChild, SupName), + {ok, Pid}; + {ok, Pid, Extra} when is_pid(Pid) -> + NChild = Child#child{pid = Pid}, + report_progress(NChild, SupName), + {ok, Pid, Extra}; + ignore -> + {ok, undefined}; + {error, What} -> {error, What}; + What -> {error, What} + end. + +do_start_child_i(M, F, A) -> + case catch apply(M, F, A) of + {ok, Pid} when is_pid(Pid) -> + {ok, Pid}; + {ok, Pid, Extra} when is_pid(Pid) -> + {ok, Pid, Extra}; + ignore -> + {ok, undefined}; + {error, Error} -> + {error, Error}; + What -> + {error, What} + end. + + +%%% --------------------------------------------------- +%%% +%%% Callback functions. +%%% +%%% --------------------------------------------------- +handle_call({start_child, EArgs}, _From, State) when ?is_simple(State) -> + #child{mfa = {M, F, A}} = hd(State#state.children), + Args = A ++ EArgs, + case do_start_child_i(M, F, Args) of + {ok, undefined} -> + {reply, {ok, undefined}, State}; + {ok, Pid} -> + NState = State#state{dynamics = + ?DICT:store(Pid, Args, State#state.dynamics)}, + {reply, {ok, Pid}, NState}; + {ok, Pid, Extra} -> + NState = State#state{dynamics = + ?DICT:store(Pid, Args, State#state.dynamics)}, + {reply, {ok, Pid, Extra}, NState}; + What -> + {reply, What, State} + end; + +%%% The requests terminate_child, delete_child and restart_child are +%%% invalid for simple_one_for_one and simple_one_for_one_terminate +%%% supervisors. +handle_call({_Req, _Data}, _From, State) when ?is_simple(State) -> + {reply, {error, State#state.strategy}, State}; + +handle_call({start_child, ChildSpec}, _From, State) -> + case check_childspec(ChildSpec) of + {ok, Child} -> + {Resp, NState} = handle_start_child(Child, State), + {reply, Resp, NState}; + What -> + {reply, {error, What}, State} + end; + +handle_call({restart_child, Name}, _From, State) -> + case get_child(Name, State) of + {value, Child} when Child#child.pid =:= undefined -> + case do_start_child(State#state.name, Child) of + {ok, Pid} -> + NState = replace_child(Child#child{pid = Pid}, State), + {reply, {ok, Pid}, NState}; + {ok, Pid, Extra} -> + NState = replace_child(Child#child{pid = Pid}, State), + {reply, {ok, Pid, Extra}, NState}; + Error -> + {reply, Error, State} + end; + {value, _} -> + {reply, {error, running}, State}; + _ -> + {reply, {error, not_found}, State} + end; + +handle_call({delete_child, Name}, _From, State) -> + case get_child(Name, State) of + {value, Child} when Child#child.pid =:= undefined -> + NState = remove_child(Child, State), + {reply, ok, NState}; + {value, _} -> + {reply, {error, running}, State}; + _ -> + {reply, {error, not_found}, State} + end; + +handle_call({terminate_child, Name}, _From, State) -> + case get_child(Name, State) of + {value, Child} -> + NChild = do_terminate(Child, State#state.name), + {reply, ok, replace_child(NChild, State)}; + _ -> + {reply, {error, not_found}, State} + end; + +handle_call(which_children, _From, State) when ?is_simple(State) -> + [#child{child_type = CT, modules = Mods}] = State#state.children, + Reply = lists:map(fun ({Pid, _}) -> {undefined, Pid, CT, Mods} end, + ?DICT:to_list(State#state.dynamics)), + {reply, Reply, State}; + +handle_call(which_children, _From, State) -> + Resp = + lists:map(fun (#child{pid = Pid, name = Name, + child_type = ChildType, modules = Mods}) -> + {Name, Pid, ChildType, Mods} + end, + State#state.children), + {reply, Resp, State}. + +%%% Hopefully cause a function-clause as there is no API function +%%% that utilizes cast. +handle_cast(null, State) -> + error_logger:error_msg("ERROR: Supervisor received cast-message 'null'~n", + []), + + {noreply, State}. + +handle_info({delayed_restart, {RestartType, Reason, Child}}, State) + when ?is_simple(State) -> + {ok, NState} = do_restart(RestartType, Reason, Child, State), + {noreply, NState}; +handle_info({delayed_restart, {RestartType, Reason, Child}}, State) -> + case get_child(Child#child.name, State) of + {value, Child1} -> + {ok, NState} = do_restart(RestartType, Reason, Child1, State), + {noreply, NState}; + _ -> + {noreply, State} + end; + +%% +%% Take care of terminated children. +%% +handle_info({'EXIT', Pid, Reason}, State) -> + case restart_child(Pid, Reason, State) of + {ok, State1} -> + {noreply, State1}; + {shutdown, State1} -> + {stop, shutdown, State1} + end; + +handle_info(Msg, State) -> + error_logger:error_msg("Supervisor received unexpected message: ~p~n", + [Msg]), + {noreply, State}. +%% +%% Terminate this server. +%% +terminate(_Reason, State) when ?is_terminate_simple(State) -> + terminate_simple_children( + hd(State#state.children), State#state.dynamics, State#state.name), + ok; +terminate(_Reason, State) -> + terminate_children(State#state.children, State#state.name), + ok. + +%% +%% Change code for the supervisor. +%% Call the new call-back module and fetch the new start specification. +%% Combine the new spec. with the old. If the new start spec. is +%% not valid the code change will not succeed. +%% Use the old Args as argument to Module:init/1. +%% NOTE: This requires that the init function of the call-back module +%% does not have any side effects. +%% +code_change(_, State, _) -> + case (State#state.module):init(State#state.args) of + {ok, {SupFlags, StartSpec}} -> + case catch check_flags(SupFlags) of + ok -> + {Strategy, MaxIntensity, Period} = SupFlags, + update_childspec(State#state{strategy = Strategy, + intensity = MaxIntensity, + period = Period}, + StartSpec); + Error -> + {error, Error} + end; + ignore -> + {ok, State}; + Error -> + Error + end. + +check_flags({Strategy, MaxIntensity, Period}) -> + validStrategy(Strategy), + validIntensity(MaxIntensity), + validPeriod(Period), + ok; +check_flags(What) -> + {bad_flags, What}. + +update_childspec(State, StartSpec) when ?is_simple(State) -> + case check_startspec(StartSpec) of + {ok, [Child]} -> + {ok, State#state{children = [Child]}}; + Error -> + {error, Error} + end; + +update_childspec(State, StartSpec) -> + case check_startspec(StartSpec) of + {ok, Children} -> + OldC = State#state.children, % In reverse start order ! + NewC = update_childspec1(OldC, Children, []), + {ok, State#state{children = NewC}}; + Error -> + {error, Error} + end. + +update_childspec1([Child|OldC], Children, KeepOld) -> + case update_chsp(Child, Children) of + {ok,NewChildren} -> + update_childspec1(OldC, NewChildren, KeepOld); + false -> + update_childspec1(OldC, Children, [Child|KeepOld]) + end; +update_childspec1([], Children, KeepOld) -> + % Return them in (keeped) reverse start order. + lists:reverse(Children ++ KeepOld). + +update_chsp(OldCh, Children) -> + case lists:map(fun (Ch) when OldCh#child.name =:= Ch#child.name -> + Ch#child{pid = OldCh#child.pid}; + (Ch) -> + Ch + end, + Children) of + Children -> + false; % OldCh not found in new spec. + NewC -> + {ok, NewC} + end. + +%%% --------------------------------------------------- +%%% Start a new child. +%%% --------------------------------------------------- + +handle_start_child(Child, State) -> + case get_child(Child#child.name, State) of + false -> + case do_start_child(State#state.name, Child) of + {ok, Pid} -> + Children = State#state.children, + {{ok, Pid}, + State#state{children = + [Child#child{pid = Pid}|Children]}}; + {ok, Pid, Extra} -> + Children = State#state.children, + {{ok, Pid, Extra}, + State#state{children = + [Child#child{pid = Pid}|Children]}}; + {error, What} -> + {{error, {What, Child}}, State} + end; + {value, OldChild} when OldChild#child.pid =/= undefined -> + {{error, {already_started, OldChild#child.pid}}, State}; + {value, _OldChild} -> + {{error, already_present}, State} + end. + +%%% --------------------------------------------------- +%%% Restart. A process has terminated. +%%% Returns: {ok, #state} | {shutdown, #state} +%%% --------------------------------------------------- + +restart_child(Pid, Reason, State) when ?is_simple(State) -> + case ?DICT:find(Pid, State#state.dynamics) of + {ok, Args} -> + [Child] = State#state.children, + RestartType = Child#child.restart_type, + {M, F, _} = Child#child.mfa, + NChild = Child#child{pid = Pid, mfa = {M, F, Args}}, + do_restart(RestartType, Reason, NChild, State); + error -> + {ok, State} + end; +restart_child(Pid, Reason, State) -> + Children = State#state.children, + case lists:keysearch(Pid, #child.pid, Children) of + {value, Child} -> + RestartType = Child#child.restart_type, + do_restart(RestartType, Reason, Child, State); + _ -> + {ok, State} + end. + +do_restart({permanent = RestartType, Delay}, Reason, Child, State) -> + do_restart_delay({RestartType, Delay}, Reason, Child, State); +do_restart(permanent, Reason, Child, State) -> + report_error(child_terminated, Reason, Child, State#state.name), + restart(Child, State); +do_restart(Type, normal, Child, State) -> + del_child_and_maybe_shutdown(Type, Child, State); +do_restart({RestartType, Delay}, {shutdown, restart} = Reason, Child, State) + when RestartType =:= transient orelse RestartType =:= intrinsic -> + do_restart_delay({RestartType, Delay}, Reason, Child, State); +do_restart(Type, {shutdown, _}, Child, State) -> + del_child_and_maybe_shutdown(Type, Child, State); +do_restart(Type, shutdown, Child = #child{child_type = supervisor}, State) -> + del_child_and_maybe_shutdown(Type, Child, State); +do_restart({RestartType, Delay}, Reason, Child, State) + when RestartType =:= transient orelse RestartType =:= intrinsic -> + do_restart_delay({RestartType, Delay}, Reason, Child, State); +do_restart(Type, Reason, Child, State) when Type =:= transient orelse + Type =:= intrinsic -> + report_error(child_terminated, Reason, Child, State#state.name), + restart(Child, State); +do_restart(temporary, Reason, Child, State) -> + report_error(child_terminated, Reason, Child, State#state.name), + NState = state_del_child(Child, State), + {ok, NState}. + +do_restart_delay({RestartType, Delay}, Reason, Child, State) -> + case restart1(Child, State) of + {ok, NState} -> + {ok, NState}; + {terminate, NState} -> + _TRef = erlang:send_after(trunc(Delay*1000), self(), + {delayed_restart, + {{RestartType, Delay}, Reason, Child}}), + {ok, state_del_child(Child, NState)} + end. + +del_child_and_maybe_shutdown(intrinsic, Child, State) -> + {shutdown, state_del_child(Child, State)}; +del_child_and_maybe_shutdown({intrinsic, _Delay}, Child, State) -> + {shutdown, state_del_child(Child, State)}; +del_child_and_maybe_shutdown(_, Child, State) -> + {ok, state_del_child(Child, State)}. + +restart(Child, State) -> + case add_restart(State) of + {ok, NState} -> + restart(NState#state.strategy, Child, NState, fun restart/2); + {terminate, NState} -> + report_error(shutdown, reached_max_restart_intensity, + Child, State#state.name), + {shutdown, state_del_child(Child, NState)} + end. + +restart1(Child, State) -> + case add_restart(State) of + {ok, NState} -> + restart(NState#state.strategy, Child, NState, fun restart1/2); + {terminate, _NState} -> + %% we've reached the max restart intensity, but the + %% add_restart will have added to the restarts + %% field. Given we don't want to die here, we need to go + %% back to the old restarts field otherwise we'll never + %% attempt to restart later. + {terminate, State} + end. + +restart(Strategy, Child, State, Restart) + when Strategy =:= simple_one_for_one orelse + Strategy =:= simple_one_for_one_terminate -> + #child{mfa = {M, F, A}} = Child, + Dynamics = ?DICT:erase(Child#child.pid, State#state.dynamics), + case do_start_child_i(M, F, A) of + {ok, undefined} -> + {ok, State}; + {ok, Pid} -> + NState = State#state{dynamics = ?DICT:store(Pid, A, Dynamics)}, + {ok, NState}; + {ok, Pid, _Extra} -> + NState = State#state{dynamics = ?DICT:store(Pid, A, Dynamics)}, + {ok, NState}; + {error, Error} -> + report_error(start_error, Error, Child, State#state.name), + Restart(Child, State) + end; +restart(one_for_one, Child, State, Restart) -> + case do_start_child(State#state.name, Child) of + {ok, Pid} -> + NState = replace_child(Child#child{pid = Pid}, State), + {ok, NState}; + {ok, Pid, _Extra} -> + NState = replace_child(Child#child{pid = Pid}, State), + {ok, NState}; + {error, Reason} -> + report_error(start_error, Reason, Child, State#state.name), + Restart(Child, State) + end; +restart(rest_for_one, Child, State, Restart) -> + {ChAfter, ChBefore} = split_child(Child#child.pid, State#state.children), + ChAfter2 = terminate_children(ChAfter, State#state.name), + case start_children(ChAfter2, State#state.name) of + {ok, ChAfter3} -> + {ok, State#state{children = ChAfter3 ++ ChBefore}}; + {error, ChAfter3} -> + Restart(Child, State#state{children = ChAfter3 ++ ChBefore}) + end; +restart(one_for_all, Child, State, Restart) -> + Children1 = del_child(Child#child.pid, State#state.children), + Children2 = terminate_children(Children1, State#state.name), + case start_children(Children2, State#state.name) of + {ok, NChs} -> + {ok, State#state{children = NChs}}; + {error, NChs} -> + Restart(Child, State#state{children = NChs}) + end. + +%%----------------------------------------------------------------- +%% Func: terminate_children/2 +%% Args: Children = [#child] in termination order +%% SupName = {local, atom()} | {global, atom()} | {pid(),Mod} +%% Returns: NChildren = [#child] in +%% startup order (reversed termination order) +%%----------------------------------------------------------------- +terminate_children(Children, SupName) -> + terminate_children(Children, SupName, []). + +terminate_children([Child | Children], SupName, Res) -> + NChild = do_terminate(Child, SupName), + terminate_children(Children, SupName, [NChild | Res]); +terminate_children([], _SupName, Res) -> + Res. + +terminate_simple_children(Child, Dynamics, SupName) -> + Pids = dict:fold(fun (Pid, _Args, Pids) -> + erlang:monitor(process, Pid), + unlink(Pid), + exit(Pid, child_exit_reason(Child)), + [Pid | Pids] + end, [], Dynamics), + TimeoutMsg = {timeout, make_ref()}, + TRef = timeout_start(Child, TimeoutMsg), + {Replies, Timedout} = + lists:foldl( + fun (_Pid, {Replies, Timedout}) -> + {Pid1, Reason1, Timedout1} = + receive + TimeoutMsg -> + Remaining = Pids -- [P || {P, _} <- Replies], + [exit(P, kill) || P <- Remaining], + receive + {'DOWN', _MRef, process, Pid, Reason} -> + {Pid, Reason, true} + end; + {'DOWN', _MRef, process, Pid, Reason} -> + {Pid, Reason, Timedout} + end, + {[{Pid1, child_res(Child, Reason1, Timedout1)} | Replies], + Timedout1} + end, {[], false}, Pids), + timeout_stop(Child, TRef, TimeoutMsg, Timedout), + ReportError = shutdown_error_reporter(SupName), + Report = fun(_, ok) -> ok; + (Pid, {error, R}) -> ReportError(R, Child#child{pid = Pid}) + end, + [receive + {'EXIT', Pid, Reason} -> + Report(Pid, child_res(Child, Reason, Timedout)) + after + 0 -> Report(Pid, Reply) + end || {Pid, Reply} <- Replies], + ok. + +child_exit_reason(#child{shutdown = brutal_kill}) -> kill; +child_exit_reason(#child{}) -> shutdown. + +child_res(#child{shutdown=brutal_kill}, killed, false) -> ok; +child_res(#child{}, shutdown, false) -> ok; +child_res(#child{restart_type=permanent}, normal, false) -> {error, normal}; +child_res(#child{restart_type={permanent,_}},normal, false) -> {error, normal}; +child_res(#child{}, normal, false) -> ok; +child_res(#child{}, R, _) -> {error, R}. + +timeout_start(#child{shutdown = Time}, Msg) when is_integer(Time) -> + erlang:send_after(Time, self(), Msg); +timeout_start(#child{}, _Msg) -> + ok. + +timeout_stop(#child{shutdown = Time}, TRef, Msg, false) when is_integer(Time) -> + erlang:cancel_timer(TRef), + receive + Msg -> ok + after + 0 -> ok + end; +timeout_stop(#child{}, _TRef, _Msg, _Timedout) -> + ok. + +do_terminate(Child, SupName) when Child#child.pid =/= undefined -> + ReportError = shutdown_error_reporter(SupName), + case shutdown(Child#child.pid, Child#child.shutdown) of + ok -> + ok; + {error, normal} -> + case Child#child.restart_type of + permanent -> ReportError(normal, Child); + {permanent, _Delay} -> ReportError(normal, Child); + _ -> ok + end; + {error, OtherReason} -> + ReportError(OtherReason, Child) + end, + Child#child{pid = undefined}; +do_terminate(Child, _SupName) -> + Child. + +%%----------------------------------------------------------------- +%% Shutdowns a child. We must check the EXIT value +%% of the child, because it might have died with another reason than +%% the wanted. In that case we want to report the error. We put a +%% monitor on the child an check for the 'DOWN' message instead of +%% checking for the 'EXIT' message, because if we check the 'EXIT' +%% message a "naughty" child, who does unlink(Sup), could hang the +%% supervisor. +%% Returns: ok | {error, OtherReason} (this should be reported) +%%----------------------------------------------------------------- +shutdown(Pid, brutal_kill) -> + + case monitor_child(Pid) of + ok -> + exit(Pid, kill), + receive + {'DOWN', _MRef, process, Pid, killed} -> + ok; + {'DOWN', _MRef, process, Pid, OtherReason} -> + {error, OtherReason} + end; + {error, Reason} -> + {error, Reason} + end; + +shutdown(Pid, Time) -> + + case monitor_child(Pid) of + ok -> + exit(Pid, shutdown), %% Try to shutdown gracefully + receive + {'DOWN', _MRef, process, Pid, shutdown} -> + ok; + {'DOWN', _MRef, process, Pid, OtherReason} -> + {error, OtherReason} + after Time -> + exit(Pid, kill), %% Force termination. + receive + {'DOWN', _MRef, process, Pid, OtherReason} -> + {error, OtherReason} + end + end; + {error, Reason} -> + {error, Reason} + end. + +%% Help function to shutdown/2 switches from link to monitor approach +monitor_child(Pid) -> + + %% Do the monitor operation first so that if the child dies + %% before the monitoring is done causing a 'DOWN'-message with + %% reason noproc, we will get the real reason in the 'EXIT'-message + %% unless a naughty child has already done unlink... + erlang:monitor(process, Pid), + unlink(Pid), + + receive + %% If the child dies before the unlik we must empty + %% the mail-box of the 'EXIT'-message and the 'DOWN'-message. + {'EXIT', Pid, Reason} -> + receive + {'DOWN', _, process, Pid, _} -> + {error, Reason} + end + after 0 -> + %% If a naughty child did unlink and the child dies before + %% monitor the result will be that shutdown/2 receives a + %% 'DOWN'-message with reason noproc. + %% If the child should die after the unlink there + %% will be a 'DOWN'-message with a correct reason + %% that will be handled in shutdown/2. + ok + end. + + +%%----------------------------------------------------------------- +%% Child/State manipulating functions. +%%----------------------------------------------------------------- +state_del_child(#child{pid = Pid}, State) when ?is_simple(State) -> + NDynamics = ?DICT:erase(Pid, State#state.dynamics), + State#state{dynamics = NDynamics}; +state_del_child(Child, State) -> + NChildren = del_child(Child#child.name, State#state.children), + State#state{children = NChildren}. + +del_child(Name, [Ch|Chs]) when Ch#child.name =:= Name -> + [Ch#child{pid = undefined} | Chs]; +del_child(Pid, [Ch|Chs]) when Ch#child.pid =:= Pid -> + [Ch#child{pid = undefined} | Chs]; +del_child(Name, [Ch|Chs]) -> + [Ch|del_child(Name, Chs)]; +del_child(_, []) -> + []. + +%% Chs = [S4, S3, Ch, S1, S0] +%% Ret: {[S4, S3, Ch], [S1, S0]} +split_child(Name, Chs) -> + split_child(Name, Chs, []). + +split_child(Name, [Ch|Chs], After) when Ch#child.name =:= Name -> + {lists:reverse([Ch#child{pid = undefined} | After]), Chs}; +split_child(Pid, [Ch|Chs], After) when Ch#child.pid =:= Pid -> + {lists:reverse([Ch#child{pid = undefined} | After]), Chs}; +split_child(Name, [Ch|Chs], After) -> + split_child(Name, Chs, [Ch | After]); +split_child(_, [], After) -> + {lists:reverse(After), []}. + +get_child(Name, State) -> + lists:keysearch(Name, #child.name, State#state.children). +replace_child(Child, State) -> + Chs = do_replace_child(Child, State#state.children), + State#state{children = Chs}. + +do_replace_child(Child, [Ch|Chs]) when Ch#child.name =:= Child#child.name -> + [Child | Chs]; +do_replace_child(Child, [Ch|Chs]) -> + [Ch|do_replace_child(Child, Chs)]. + +remove_child(Child, State) -> + Chs = lists:keydelete(Child#child.name, #child.name, State#state.children), + State#state{children = Chs}. + +%%----------------------------------------------------------------- +%% Func: init_state/4 +%% Args: SupName = {local, atom()} | {global, atom()} | self +%% Type = {Strategy, MaxIntensity, Period} +%% Strategy = one_for_one | one_for_all | simple_one_for_one | +%% rest_for_one +%% MaxIntensity = integer() +%% Period = integer() +%% Mod :== atom() +%% Arsg :== term() +%% Purpose: Check that Type is of correct type (!) +%% Returns: {ok, #state} | Error +%%----------------------------------------------------------------- +init_state(SupName, Type, Mod, Args) -> + case catch init_state1(SupName, Type, Mod, Args) of + {ok, State} -> + {ok, State}; + Error -> + Error + end. + +init_state1(SupName, {Strategy, MaxIntensity, Period}, Mod, Args) -> + validStrategy(Strategy), + validIntensity(MaxIntensity), + validPeriod(Period), + {ok, #state{name = supname(SupName,Mod), + strategy = Strategy, + intensity = MaxIntensity, + period = Period, + module = Mod, + args = Args}}; +init_state1(_SupName, Type, _, _) -> + {invalid_type, Type}. + +validStrategy(simple_one_for_one_terminate) -> true; +validStrategy(simple_one_for_one) -> true; +validStrategy(one_for_one) -> true; +validStrategy(one_for_all) -> true; +validStrategy(rest_for_one) -> true; +validStrategy(What) -> throw({invalid_strategy, What}). + +validIntensity(Max) when is_integer(Max), + Max >= 0 -> true; +validIntensity(What) -> throw({invalid_intensity, What}). + +validPeriod(Period) when is_integer(Period), + Period > 0 -> true; +validPeriod(What) -> throw({invalid_period, What}). + +supname(self,Mod) -> {self(),Mod}; +supname(N,_) -> N. + +%%% ------------------------------------------------------ +%%% Check that the children start specification is valid. +%%% Shall be a six (6) tuple +%%% {Name, Func, RestartType, Shutdown, ChildType, Modules} +%%% where Name is an atom +%%% Func is {Mod, Fun, Args} == {atom, atom, list} +%%% RestartType is permanent | temporary | transient | +%%% intrinsic | {permanent, Delay} | +%%% {transient, Delay} | {intrinsic, Delay} +%% where Delay >= 0 +%%% Shutdown = integer() | infinity | brutal_kill +%%% ChildType = supervisor | worker +%%% Modules = [atom()] | dynamic +%%% Returns: {ok, [#child]} | Error +%%% ------------------------------------------------------ + +check_startspec(Children) -> check_startspec(Children, []). + +check_startspec([ChildSpec|T], Res) -> + case check_childspec(ChildSpec) of + {ok, Child} -> + case lists:keymember(Child#child.name, #child.name, Res) of + true -> {duplicate_child_name, Child#child.name}; + false -> check_startspec(T, [Child | Res]) + end; + Error -> Error + end; +check_startspec([], Res) -> + {ok, lists:reverse(Res)}. + +check_childspec({Name, Func, RestartType, Shutdown, ChildType, Mods}) -> + catch check_childspec(Name, Func, RestartType, Shutdown, ChildType, Mods); +check_childspec(X) -> {invalid_child_spec, X}. + +check_childspec(Name, Func, RestartType, Shutdown, ChildType, Mods) -> + validName(Name), + validFunc(Func), + validRestartType(RestartType), + validChildType(ChildType), + validShutdown(Shutdown, ChildType), + validMods(Mods), + {ok, #child{name = Name, mfa = Func, restart_type = RestartType, + shutdown = Shutdown, child_type = ChildType, modules = Mods}}. + +validChildType(supervisor) -> true; +validChildType(worker) -> true; +validChildType(What) -> throw({invalid_child_type, What}). + +validName(_Name) -> true. + +validFunc({M, F, A}) when is_atom(M), + is_atom(F), + is_list(A) -> true; +validFunc(Func) -> throw({invalid_mfa, Func}). + +validRestartType(permanent) -> true; +validRestartType(temporary) -> true; +validRestartType(transient) -> true; +validRestartType(intrinsic) -> true; +validRestartType({permanent, Delay}) -> validDelay(Delay); +validRestartType({intrinsic, Delay}) -> validDelay(Delay); +validRestartType({transient, Delay}) -> validDelay(Delay); +validRestartType(RestartType) -> throw({invalid_restart_type, + RestartType}). + +validDelay(Delay) when is_number(Delay), + Delay >= 0 -> true; +validDelay(What) -> throw({invalid_delay, What}). + +validShutdown(Shutdown, _) + when is_integer(Shutdown), Shutdown > 0 -> true; +validShutdown(infinity, supervisor) -> true; +validShutdown(brutal_kill, _) -> true; +validShutdown(Shutdown, _) -> throw({invalid_shutdown, Shutdown}). + +validMods(dynamic) -> true; +validMods(Mods) when is_list(Mods) -> + lists:foreach(fun (Mod) -> + if + is_atom(Mod) -> ok; + true -> throw({invalid_module, Mod}) + end + end, + Mods); +validMods(Mods) -> throw({invalid_modules, Mods}). + +%%% ------------------------------------------------------ +%%% Add a new restart and calculate if the max restart +%%% intensity has been reached (in that case the supervisor +%%% shall terminate). +%%% All restarts accured inside the period amount of seconds +%%% are kept in the #state.restarts list. +%%% Returns: {ok, State'} | {terminate, State'} +%%% ------------------------------------------------------ + +add_restart(State) -> + I = State#state.intensity, + P = State#state.period, + R = State#state.restarts, + Now = erlang:now(), + R1 = add_restart([Now|R], Now, P), + State1 = State#state{restarts = R1}, + case length(R1) of + CurI when CurI =< I -> + {ok, State1}; + _ -> + {terminate, State1} + end. + +add_restart([R|Restarts], Now, Period) -> + case inPeriod(R, Now, Period) of + true -> + [R|add_restart(Restarts, Now, Period)]; + _ -> + [] + end; +add_restart([], _, _) -> + []. + +inPeriod(Time, Now, Period) -> + case difference(Time, Now) of + T when T > Period -> + false; + _ -> + true + end. + +%% +%% Time = {MegaSecs, Secs, MicroSecs} (NOTE: MicroSecs is ignored) +%% Calculate the time elapsed in seconds between two timestamps. +%% If MegaSecs is equal just subtract Secs. +%% Else calculate the Mega difference and add the Secs difference, +%% note that Secs difference can be negative, e.g. +%% {827, 999999, 676} diff {828, 1, 653753} == > 2 secs. +%% +difference({TimeM, TimeS, _}, {CurM, CurS, _}) when CurM > TimeM -> + ((CurM - TimeM) * 1000000) + (CurS - TimeS); +difference({_, TimeS, _}, {_, CurS, _}) -> + CurS - TimeS. + +%%% ------------------------------------------------------ +%%% Error and progress reporting. +%%% ------------------------------------------------------ + +report_error(Error, Reason, Child, SupName) -> + ErrorMsg = [{supervisor, SupName}, + {errorContext, Error}, + {reason, Reason}, + {offender, extract_child(Child)}], + error_logger:error_report(supervisor_report, ErrorMsg). + +shutdown_error_reporter(SupName) -> + fun(Reason, Child) -> + report_error(shutdown_error, Reason, Child, SupName) + end. + +extract_child(Child) -> + [{pid, Child#child.pid}, + {name, Child#child.name}, + {mfa, Child#child.mfa}, + {restart_type, Child#child.restart_type}, + {shutdown, Child#child.shutdown}, + {child_type, Child#child.child_type}]. + +report_progress(Child, SupName) -> + Progress = [{supervisor, SupName}, + {started, extract_child(Child)}], + error_logger:info_report(progress, Progress). diff --git a/src/tcp_acceptor.erl b/src/tcp_acceptor.erl new file mode 100644 index 000000000..344196d70 --- /dev/null +++ b/src/tcp_acceptor.erl @@ -0,0 +1,89 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% + +-module(tcp_acceptor). + +-behaviour(gen_server). + +-export([start_link/2]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-record(state, {callback, sock, ref}). + +%%-------------------------------------------------------------------- + +start_link(Callback, LSock) -> + gen_server:start_link(?MODULE, {Callback, LSock}, []). + +%%-------------------------------------------------------------------- + +init({Callback, LSock}) -> + gen_server:cast(self(), accept), + {ok, #state{callback=Callback, sock=LSock}}. + +handle_call(_Request, _From, State) -> + {noreply, State}. + +handle_cast(accept, State) -> + ok = file_handle_cache:obtain(), + accept(State); + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info({inet_async, LSock, Ref, {ok, Sock}}, + State = #state{callback={M,F,A}, sock=LSock, ref=Ref}) -> + + %% patch up the socket so it looks like one we got from + %% gen_tcp:accept/1 + {ok, Mod} = inet_db:lookup_socket(LSock), + inet_db:register_socket(Sock, Mod), + + %% handle + file_handle_cache:transfer(apply(M, F, A ++ [Sock])), + ok = file_handle_cache:obtain(), + + %% accept more + accept(State); + +handle_info({inet_async, LSock, Ref, {error, closed}}, + State=#state{sock=LSock, ref=Ref}) -> + %% It would be wrong to attempt to restart the acceptor when we + %% know this will fail. + {stop, normal, State}; + +handle_info({inet_async, LSock, Ref, {error, Reason}}, + State=#state{sock=LSock, ref=Ref}) -> + {stop, {accept_failed, Reason}, State}; + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, _State) -> + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%-------------------------------------------------------------------- + +accept(State = #state{sock=LSock}) -> + case prim_inet:async_accept(LSock, -1) of + {ok, Ref} -> {noreply, State#state{ref=Ref}}; + Error -> {stop, {cannot_accept, Error}, State} + end. diff --git a/src/tcp_acceptor_sup.erl b/src/tcp_acceptor_sup.erl new file mode 100644 index 000000000..d8844441d --- /dev/null +++ b/src/tcp_acceptor_sup.erl @@ -0,0 +1,43 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% + +-module(tcp_acceptor_sup). + +-behaviour(supervisor). + +-export([start_link/2]). + +-export([init/1]). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(mfargs() :: {atom(), atom(), [any()]}). + +-spec(start_link/2 :: (atom(), mfargs()) -> rabbit_types:ok_pid_or_error()). + +-endif. + +%%---------------------------------------------------------------------------- + +start_link(Name, Callback) -> + supervisor:start_link({local,Name}, ?MODULE, Callback). + +init(Callback) -> + {ok, {{simple_one_for_one, 10, 10}, + [{tcp_acceptor, {tcp_acceptor, start_link, [Callback]}, + transient, brutal_kill, worker, [tcp_acceptor]}]}}. diff --git a/src/tcp_listener.erl b/src/tcp_listener.erl new file mode 100644 index 000000000..93b4934a3 --- /dev/null +++ b/src/tcp_listener.erl @@ -0,0 +1,113 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% + +-module(tcp_listener). + +-behaviour(gen_server). + +-export([start_link/8]). + +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-record(state, {sock, on_startup, on_shutdown, label}). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(mfargs() :: {atom(), atom(), [any()]}). + +-spec(start_link/8 :: + (inet:ip_address(), inet:port_number(), [gen_tcp:listen_option()], + integer(), atom(), mfargs(), mfargs(), string()) -> + rabbit_types:ok_pid_or_error()). + +-endif. + +%%-------------------------------------------------------------------- + +start_link(IPAddress, Port, SocketOpts, + ConcurrentAcceptorCount, AcceptorSup, + OnStartup, OnShutdown, Label) -> + gen_server:start_link( + ?MODULE, {IPAddress, Port, SocketOpts, + ConcurrentAcceptorCount, AcceptorSup, + OnStartup, OnShutdown, Label}, []). + +%%-------------------------------------------------------------------- + +init({IPAddress, Port, SocketOpts, + ConcurrentAcceptorCount, AcceptorSup, + {M,F,A} = OnStartup, OnShutdown, Label}) -> + process_flag(trap_exit, true), + case gen_tcp:listen(Port, SocketOpts ++ [{ip, IPAddress}, + {active, false}]) of + {ok, LSock} -> + lists:foreach(fun (_) -> + {ok, _APid} = supervisor:start_child( + AcceptorSup, [LSock]) + end, + lists:duplicate(ConcurrentAcceptorCount, dummy)), + {ok, {LIPAddress, LPort}} = inet:sockname(LSock), + error_logger:info_msg( + "started ~s on ~s:~p~n", + [Label, ntoab(LIPAddress), LPort]), + apply(M, F, A ++ [IPAddress, Port]), + {ok, #state{sock = LSock, + on_startup = OnStartup, on_shutdown = OnShutdown, + label = Label}}; + {error, Reason} -> + error_logger:error_msg( + "failed to start ~s on ~s:~p - ~p (~s)~n", + [Label, ntoab(IPAddress), Port, + Reason, inet:format_error(Reason)]), + {stop, {cannot_listen, IPAddress, Port, Reason}} + end. + +handle_call(_Request, _From, State) -> + {noreply, State}. + +handle_cast(_Msg, State) -> + {noreply, State}. + +handle_info(_Info, State) -> + {noreply, State}. + +terminate(_Reason, #state{sock=LSock, on_shutdown = {M,F,A}, label=Label}) -> + {ok, {IPAddress, Port}} = inet:sockname(LSock), + gen_tcp:close(LSock), + error_logger:info_msg("stopped ~s on ~s:~p~n", + [Label, ntoab(IPAddress), Port]), + apply(M, F, A ++ [IPAddress, Port]). + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%% Format IPv4-mapped IPv6 addresses as IPv4, since they're what we see +%% when IPv6 is enabled but not used (i.e. 99% of the time). +ntoa({0,0,0,0,0,16#ffff,AB,CD}) -> + inet_parse:ntoa({AB bsr 8, AB rem 256, CD bsr 8, CD rem 256}); +ntoa(IP) -> + inet_parse:ntoa(IP). + +ntoab(IP) -> + Str = ntoa(IP), + case string:str(Str, ":") of + 0 -> Str; + _ -> "[" ++ Str ++ "]" + end. + diff --git a/src/tcp_listener_sup.erl b/src/tcp_listener_sup.erl new file mode 100644 index 000000000..430507af9 --- /dev/null +++ b/src/tcp_listener_sup.erl @@ -0,0 +1,79 @@ +%% The contents of this file are subject to the Mozilla Public License +%% Version 1.1 (the "License"); you may not use this file except in +%% compliance with the License. You may obtain a copy of the License +%% at http://www.mozilla.org/MPL/ +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and +%% limitations under the License. +%% +%% The Original Code is RabbitMQ. +%% +%% The Initial Developer of the Original Code is VMware, Inc. +%% Copyright (c) 2007-2012 VMware, Inc. All rights reserved. +%% + +-module(tcp_listener_sup). + +-behaviour(supervisor). + +-export([start_link/7, start_link/8]). + +-export([init/1]). + +%%---------------------------------------------------------------------------- + +-ifdef(use_specs). + +-type(mfargs() :: {atom(), atom(), [any()]}). + +-spec(start_link/7 :: + (inet:ip_address(), inet:port_number(), [gen_tcp:listen_option()], + mfargs(), mfargs(), mfargs(), string()) -> + rabbit_types:ok_pid_or_error()). +-spec(start_link/8 :: + (inet:ip_address(), inet:port_number(), [gen_tcp:listen_option()], + mfargs(), mfargs(), mfargs(), integer(), string()) -> + rabbit_types:ok_pid_or_error()). + +-endif. + +%%---------------------------------------------------------------------------- + +start_link(IPAddress, Port, SocketOpts, OnStartup, OnShutdown, + AcceptCallback, Label) -> + start_link(IPAddress, Port, SocketOpts, OnStartup, OnShutdown, + AcceptCallback, 1, Label). + +start_link(IPAddress, Port, SocketOpts, OnStartup, OnShutdown, + AcceptCallback, ConcurrentAcceptorCount, Label) -> + supervisor:start_link( + ?MODULE, {IPAddress, Port, SocketOpts, OnStartup, OnShutdown, + AcceptCallback, ConcurrentAcceptorCount, Label}). + +init({IPAddress, Port, SocketOpts, OnStartup, OnShutdown, + AcceptCallback, ConcurrentAcceptorCount, Label}) -> + %% This is gross. The tcp_listener needs to know about the + %% tcp_acceptor_sup, and the only way I can think of accomplishing + %% that without jumping through hoops is to register the + %% tcp_acceptor_sup. + Name = tcp_name(tcp_acceptor_sup, IPAddress, Port), + {ok, {{one_for_all, 10, 10}, + [{tcp_acceptor_sup, {tcp_acceptor_sup, start_link, + [Name, AcceptCallback]}, + transient, infinity, supervisor, [tcp_acceptor_sup]}, + {tcp_listener, {tcp_listener, start_link, + [IPAddress, Port, SocketOpts, + ConcurrentAcceptorCount, Name, + OnStartup, OnShutdown, Label]}, + transient, 16#ffffffff, worker, [tcp_listener]}]}}. + +tcp_name(Prefix, IPAddress, Port) + when is_atom(Prefix) andalso is_number(Port) -> + list_to_atom( + lists:flatten( + io_lib:format( + "~w_~s:~w", [Prefix, inet_parse:ntoa(IPAddress), Port]))). + +