diff --git a/Makefile b/Makefile index b5f0361..f2e0ccd 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ OPTFLAGS = -O2 #-flto DEBUGFLAGS = -gdwarf-3 #-fsanitize=address WARNFLAGS = -Wall -Wnon-virtual-dtor -Woverloaded-virtual CPUFLAGS = #-march=core2 -mtune=corei7 -CXXFLAGS = -std=gnu++11 $(OPTFLAGS) $(DEBUGFLAGS) $(shell getconf LFS_CFLAGS) $(WARNFLAGS) $(CPUFLAGS) +CXXFLAGS = -std=gnu++17 $(OPTFLAGS) $(DEBUGFLAGS) $(shell getconf LFS_CFLAGS) $(WARNFLAGS) $(CPUFLAGS) LDFLAGS = -L. $(shell getconf LFS_LDFLAGS) LDLIBS = $(shell getconf LFS_LIBS) -lyrmcds $(LIBTCMALLOC) -latomic -lpthread diff --git a/etc/yrmcds.conf b/etc/yrmcds.conf index dea5f8b..7b83eab 100644 --- a/etc/yrmcds.conf +++ b/etc/yrmcds.conf @@ -6,7 +6,19 @@ user = nobody # setgid group group = nogroup -# To become the master, virtual_ip address must be owned. +# method of leader election. "virtual_ip" or "file". +# * virtual_ip: +# The node that owns the virtual_ip address becomes the master. +# "virtual_ip" must be set. +# "master_host" and "master_file" are ignored. +# * file: +# The node that has "master_file" becomes the master. +# "master_host" and "master_file" must be set. +# "virtual_ip" is ignored. +leader_election_method = "virtual_ip" + +# Specify the virtual_ip that the master owns. +# Used only when leader_election_method is "virtual_ip". virtual_ip = 127.0.0.1 # memcache protocol port number. @@ -15,6 +27,14 @@ port = 11211 # yrmcds replication protocol port number. repl_port = 11213 +# The host name used when a slave connect to the master. +# Used only when leader_election_method is "file". +master_host = localhost + +# The file that indicates that this node is the master. +# Used only when leader_election_method is "file". +master_file = /run/yrmcds/master + # max number of client connections. 0 means unlimited. max_connections = 10000 diff --git a/src/config.cpp b/src/config.cpp index 2b4c12f..8fc683b 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -9,9 +9,12 @@ namespace { +const char LEADER_ELECTION_METHOD[] = "leader_election_method"; const char VIRTUAL_IP[] = "virtual_ip"; const char PORT[] = "port"; const char REPL_PORT[] = "repl_port"; +const char MASTER_HOST[] = "master_host"; +const char MASTER_FILE[] = "master_file"; const char BIND_IP[] = "bind_ip"; const char MAX_CONNECTIONS[] = "max_connections"; const char TEMP_DIR[] = "temp_dir"; @@ -109,8 +112,26 @@ void counter_config::load(const cybozu::config_parser& cp) { void config::load(const std::string& path) { cybozu::config_parser cp(path); - if( cp.exists(VIRTUAL_IP) ) - m_vip.parse(cp.get(VIRTUAL_IP)); + if( cp.exists(LEADER_ELECTION_METHOD) ) { + auto& m = cp.get(LEADER_ELECTION_METHOD); + if( m == "virtual_ip" ) { + m_leader_election_method = leader_election_method::virtual_ip; + } else if( m == "file" ) { + m_leader_election_method = leader_election_method::file; + } else { + throw bad_config("Invalid leader election method: " + m); + } + } + + if( m_leader_election_method == leader_election_method::virtual_ip ) { + if( cp.exists(VIRTUAL_IP) ) { + cybozu::ip_address vip(cp.get(VIRTUAL_IP)); + m_vip = std::optional(vip); + } + } else { + // m_vip should have a value only when leader_election_method is virtual_ip. + m_vip = std::nullopt; + } if( cp.exists(PORT) ) { int n = cp.get_as_int(PORT); @@ -126,6 +147,16 @@ void config::load(const std::string& path) { m_repl_port = static_cast(n); } + if( m_leader_election_method == leader_election_method::file ) { + if( cp.exists(MASTER_HOST) ) { + m_master_host = std::optional(cp.get(MASTER_HOST)); + } + + if( cp.exists(MASTER_FILE) ) { + m_master_file_path = std::optional(cp.get(MASTER_FILE)); + } + } + if( cp.exists(BIND_IP) ) { for( auto& s: cybozu::tokenize(cp.get(BIND_IP), ' ') ) { m_bind_ip.emplace_back(s); @@ -245,6 +276,25 @@ void config::load(const std::string& path) { } m_counter_config.load(cp); + + sanity_check(); +} + +void config::sanity_check() { + switch( m_leader_election_method ) { + case leader_election_method::virtual_ip: + if( !m_vip ) + throw bad_config("virtual_ip must be set when leader_election_method is 'virtual_ip'"); + break; + case leader_election_method::file: + if( !m_master_file_path ) + throw bad_config("master_file_path must be set when leader_election_method is 'file'"); + if( !m_master_host ) + throw bad_config("master_host must be set when leader_election_method is 'file'"); + break; + default: + throw bad_config("Invalid leader_election_method"); + } } config g_config; diff --git a/src/config.hpp b/src/config.hpp index e79fc17..5881c67 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -10,12 +10,17 @@ #include #include -#include #include +#include +#include #include namespace yrmcds { +enum class leader_election_method { + virtual_ip, file, +}; + // Configurations for counter extension. class counter_config { public: @@ -52,7 +57,7 @@ class counter_config { class config { public: // Setup default configurations. - config(): m_vip("127.0.0.1"), m_tempdir(DEFAULT_TMPDIR) { + config() { static_assert( sizeof(std::size_t) >= 4, "std::size_t is too small" ); } @@ -66,15 +71,33 @@ class config { // This may throw miscellaneous exceptions. void load(const std::string& path); - const cybozu::ip_address& vip() const noexcept { + yrmcds::leader_election_method leader_election_method() const noexcept { + return m_leader_election_method; + } + const std::optional& vip() const noexcept { return m_vip; } + // Returns the address of the master server. + // If the leader election method is virtual_ip, this returns "virtual_ip". + // Otherwise, this returns "master_host". + std::string master_host() const { + if( m_master_host ) { + return *m_master_host; + } else if( m_vip ) { + return m_vip->str(); + } else { + throw bad_config("[bug] both of vip and master_host are not set"); + } + } std::uint16_t port() const noexcept { return m_port; } std::uint16_t repl_port() const noexcept { return m_repl_port; } + const std::optional& master_file_path() { + return m_master_file_path; + } const std::vector& bind_ip() const noexcept { return m_bind_ip; } @@ -139,13 +162,18 @@ class config { } private: + void sanity_check(); + alignas(CACHELINE_SIZE) - cybozu::ip_address m_vip; + yrmcds::leader_election_method m_leader_election_method = yrmcds::leader_election_method::virtual_ip; + std::optional m_vip = std::optional(cybozu::ip_address("127.0.0.1")); + std::optional m_master_host; + std::optional m_master_file_path; std::uint16_t m_port = DEFAULT_MEMCACHE_PORT; std::uint16_t m_repl_port = DEFAULT_REPL_PORT; std::vector m_bind_ip; unsigned int m_max_connections = 0; - std::string m_tempdir; + std::string m_tempdir = DEFAULT_TMPDIR; std::string m_user; std::string m_group; cybozu::severity m_threshold = cybozu::severity::info; diff --git a/src/constants.hpp b/src/constants.hpp index e157cc4..22b0201 100644 --- a/src/constants.hpp +++ b/src/constants.hpp @@ -38,4 +38,10 @@ const char VERSION[] = "yrmcds version 1.1.11"; } // namespace yrmcds +// Define CACHELINE_SIZE if it is not defined to prevent IDE from throwing an error +// The actual value used is written in Makefile. +#ifndef CACHELINE_SIZE +#define CACHELINE_SIZE 32 +#endif + #endif // YRMCDS_CONSTANTS_HPP diff --git a/src/counter/handler.cpp b/src/counter/handler.cpp index dfb3581..c7d882c 100644 --- a/src/counter/handler.cpp +++ b/src/counter/handler.cpp @@ -51,9 +51,12 @@ void handler::on_master_start() { make_server_socket(nullptr, port, w), cybozu::reactor::EVENT_IN); } else { - m_reactor.add_resource( - make_server_socket(g_config.vip(), port, w, true), - cybozu::reactor::EVENT_IN); + if( g_config.vip() ) { + auto vip = g_config.vip()->str(); + m_reactor.add_resource( + make_server_socket(vip.c_str(), port, w, true), + cybozu::reactor::EVENT_IN); + } for( auto& s: g_config.bind_ip() ) { m_reactor.add_resource( make_server_socket(s, port, w), @@ -83,6 +86,11 @@ void handler::on_master_end() { m_gc_thread = nullptr; // join } +bool handler::on_slave_start() { + clear(); + return true; +} + void handler::dump_stats() { std::uint64_t ops = 0; for( auto& v: g_stats.ops ) { diff --git a/src/counter/handler.hpp b/src/counter/handler.hpp index 107dd53..158ba14 100644 --- a/src/counter/handler.hpp +++ b/src/counter/handler.hpp @@ -24,10 +24,11 @@ class handler: public protocol_handler { virtual void on_master_start() override; virtual void on_master_interval() override; virtual void on_master_end() override; + virtual bool on_slave_start() override; virtual void dump_stats() override; - virtual void clear() override; private: + void clear(); bool gc_ready(); std::unique_ptr make_counter_socket(int s); diff --git a/src/election.cpp b/src/election.cpp new file mode 100644 index 0000000..8606056 --- /dev/null +++ b/src/election.cpp @@ -0,0 +1,25 @@ +// (C) 2024 Cybozu. + +#include "election.hpp" +#include "config.hpp" + +#include + +namespace fs = std::filesystem; + +namespace yrmcds { + +bool is_master() { + auto method = g_config.leader_election_method(); + if( method == leader_election_method::virtual_ip ) { + auto vip = g_config.vip(); + return cybozu::has_ip_address(*vip); + } else if( method == leader_election_method::file ) { + auto& path = *g_config.master_file_path(); + return fs::exists(path); + } else { + throw std::runtime_error("Invalid leader_election_method"); + } +} + +} // namespace yrmcds diff --git a/src/election.hpp b/src/election.hpp new file mode 100644 index 0000000..559317f --- /dev/null +++ b/src/election.hpp @@ -0,0 +1,13 @@ +// Leader election. +// (C) 2024 Cybozu. + +#ifndef YRMCDS_ELECTION_HPP +#define YRMCDS_ELECTION_HPP + +namespace yrmcds { + +bool is_master(); + +} // namespace yrmcds + +#endif diff --git a/src/handler.hpp b/src/handler.hpp index c0d66fc..33e8d3e 100644 --- a/src/handler.hpp +++ b/src/handler.hpp @@ -45,9 +45,6 @@ class protocol_handler { // Implementations should use `cybozu::logger::info()` to emit stats. virtual void dump_stats() = 0; - // Called when the server discards all stored data. - virtual void clear() {} - // If this protocol handler is ready for the reactor GC, // returns true. Otherwise, return false. virtual bool reactor_gc_ready() const { return true; } diff --git a/src/memcache/gc.cpp b/src/memcache/gc.cpp index 31fcbc3..4fc7683 100644 --- a/src/memcache/gc.cpp +++ b/src/memcache/gc.cpp @@ -1,6 +1,7 @@ // (C) 2013 Cybozu. #include "../config.hpp" +#include "../election.hpp" #include "gc.hpp" #include "replication.hpp" #include "stats.hpp" @@ -11,8 +12,8 @@ namespace yrmcds { namespace memcache { void gc_thread::run() { - if( ! cybozu::has_ip_address(g_config.vip()) ) { - cybozu::logger::error() << "VIP has been lost. Exiting quickly..."; + if( ! yrmcds::is_master() ) { + cybozu::logger::error() << "I am no longer the master. Exiting quickly..."; std::quick_exit(2); } diff --git a/src/memcache/handler.cpp b/src/memcache/handler.cpp index 5d47d05..88acda1 100644 --- a/src/memcache/handler.cpp +++ b/src/memcache/handler.cpp @@ -73,9 +73,12 @@ void handler::on_start() { make_server_socket(nullptr, g_config.port(), w), cybozu::reactor::EVENT_IN); } else { - m_reactor.add_resource( - make_server_socket(g_config.vip(), g_config.port(), w, true), - cybozu::reactor::EVENT_IN); + if( g_config.vip() ) { + auto vip = g_config.vip()->str(); + m_reactor.add_resource( + make_server_socket(vip.c_str(), g_config.port(), w, true), + cybozu::reactor::EVENT_IN); + } for( auto& s: g_config.bind_ip() ) { m_reactor.add_resource( make_server_socket(s, g_config.port(), w), @@ -95,8 +98,9 @@ void handler::on_master_start() { make_server_socket(nullptr, g_config.repl_port(), w), cybozu::reactor::EVENT_IN); } else { + auto master_host = g_config.master_host(); m_reactor.add_resource( - make_server_socket(g_config.vip(), g_config.repl_port(), w, true), + make_server_socket(master_host.c_str(), g_config.repl_port(), w, true), cybozu::reactor::EVENT_IN); for( auto& s: g_config.bind_ip() ) { m_reactor.add_resource( @@ -145,12 +149,27 @@ void handler::on_master_end() { } bool handler::on_slave_start() { - int fd = cybozu::tcp_connect(g_config.vip().str().c_str(), - g_config.repl_port()); + using logger = cybozu::logger; + + auto master_host = g_config.master_host(); + int fd; + try { + fd = cybozu::tcp_connect(master_host.c_str(), g_config.repl_port()); + } catch( std::runtime_error& err ) { + logger::error() << "Failed to connect to the master (" << master_host << "): " << err.what(); + m_reactor.run_once(); + return false; + } if( fd == -1 ) { + logger::error() << "Failed to connect to the master (" << master_host << ")"; m_reactor.run_once(); return false; } + + // on_slave_start may be called multiple times over the lifetime. + // Therefore we need to clear the hash table. + clear(); + m_repl_client_socket = new repl_client_socket(fd, m_hash); m_reactor.add_resource(std::unique_ptr(m_repl_client_socket), cybozu::reactor::EVENT_IN|cybozu::reactor::EVENT_OUT ); diff --git a/src/memcache/handler.hpp b/src/memcache/handler.hpp index 26d4e0f..e46ce2e 100644 --- a/src/memcache/handler.hpp +++ b/src/memcache/handler.hpp @@ -31,10 +31,10 @@ class handler: public protocol_handler { virtual void on_slave_end() override; virtual void on_slave_interval() override; virtual void dump_stats() override; - virtual void clear() override; virtual bool reactor_gc_ready() const override; private: + void clear(); bool gc_ready(std::time_t now); std::unique_ptr make_memcache_socket(int s); std::unique_ptr make_repl_socket(int s); diff --git a/src/memcache/memcache.cpp b/src/memcache/memcache.cpp index cbadd86..9e240a9 100644 --- a/src/memcache/memcache.cpp +++ b/src/memcache/memcache.cpp @@ -601,7 +601,7 @@ void text_response::stats_settings() { os << "STAT maxbytes " << g_config.memory_limit() << CRLF; os << "STAT tcpport " << g_config.port() << CRLF; os << "STAT replport " << g_config.repl_port() << CRLF; - os << "STAT virtual_ip " << g_config.vip().str() << CRLF; + os << "STAT virtual_ip " << g_config.vip().value_or(cybozu::ip_address()).str() << CRLF; os << "STAT evictions on" << CRLF; os << "STAT cas_enabled on" << CRLF; os << "STAT locking on" << CRLF; @@ -1078,7 +1078,7 @@ binary_response::stats_settings() { send_stat("maxbytes", std::to_string(g_config.memory_limit())); send_stat("tcpport", std::to_string(g_config.port())); send_stat("replport", std::to_string(g_config.repl_port())); - send_stat("virtual_ip", g_config.vip().str()); + send_stat("virtual_ip", g_config.vip().value_or(cybozu::ip_address()).str()); send_stat("evictions", "on"); send_stat("cas_enabled", "on"); send_stat("locking", "on"); diff --git a/src/server.cpp b/src/server.cpp index c87d4b3..d175cc6 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -1,6 +1,7 @@ // (C) 2013-2014 Cybozu. #include "constants.hpp" +#include "election.hpp" #include "memcache/handler.hpp" #include "counter/handler.hpp" #include "server.hpp" @@ -70,22 +71,17 @@ void server::serve() { for( auto& handler: m_handlers ) handler->on_start(); - if( is_master() ) + if( yrmcds::is_master() ) goto MASTER_ENTRY; while( true ) { - for( auto& handler: m_handlers ) - handler->clear(); - serve_slave(); if( m_signaled ) return; // disconnected from the master + cybozu::logger::info() << "Try to promote to master..."; for( int i = 0; i < MASTER_CHECKS; ++i ) { - if( is_master() ) + if( yrmcds::is_master() ) goto MASTER_ENTRY; - cybozu::logger::info() - << "The conditions for becoming master are not met. Sleep and retry... (" - << i << "/" << MASTER_CHECKS << ")"; std::this_thread::sleep_for( std::chrono::milliseconds(100) ); } cybozu::logger::warning() << "Could not promote to master. Join the cluster again as a slave."; @@ -97,11 +93,14 @@ void server::serve() { } void server::serve_slave() { + cybozu::logger::info() << "Entering slave mode"; + for( auto it1 = m_handlers.begin(); it1 != m_handlers.end(); ++it1 ) { if( ! (*it1)->on_slave_start() ) { // failed to start. stop already started handlers. for( auto it2 = m_handlers.begin(); it2 != it1; ++it2 ) (*it2)->on_slave_end(); + cybozu::logger::error() << "Failed to start handler."; return; } } @@ -109,7 +108,7 @@ void server::serve_slave() { cybozu::logger::info() << "Slave start"; m_reactor.run([this](cybozu::reactor& r) { - if( is_master() ) { + if( yrmcds::is_master() ) { cybozu::logger::info() << "Detected that this node is eligible to be master. Exit slave mode."; r.quit(); return; diff --git a/src/server.hpp b/src/server.hpp index 1400484..d1b0f34 100644 --- a/src/server.hpp +++ b/src/server.hpp @@ -22,11 +22,6 @@ namespace yrmcds { class server { public: server(); - - static bool is_master() { - return cybozu::has_ip_address( g_config.vip() ); - } - void serve(); private: