diff --git a/.changelog/unreleased/bug-fixes/4245-fix-tm-mode.md b/.changelog/unreleased/bug-fixes/4245-fix-tm-mode.md new file mode 100644 index 0000000000..0b33019c34 --- /dev/null +++ b/.changelog/unreleased/bug-fixes/4245-fix-tm-mode.md @@ -0,0 +1,5 @@ +- Fixed running CometBFT as a validator when the Namada config `tendermint_mode` + is set to a non-validator mode. When the `tendermint_mode` changes + from a validator to non-validator mode, the node will replace and + backup the validator consensus key and state in the CometBFT directory. + ([\#4245](https://github.com/anoma/namada/pull/4245)) \ No newline at end of file diff --git a/crates/apps/src/bin/namada-node/cli.rs b/crates/apps/src/bin/namada-node/cli.rs index a13657fcac..1e072a971a 100644 --- a/crates/apps/src/bin/namada-node/cli.rs +++ b/crates/apps/src/bin/namada-node/cli.rs @@ -28,11 +28,7 @@ pub fn main() -> Result<()> { ); ScheduledMigration::from_path(p, hash, height).unwrap() }); - node::run( - chain_ctx.config.ledger, - wasm_dir, - scheduled_migration, - ); + node::run(chain_ctx.config, wasm_dir, scheduled_migration); } cmds::Ledger::RunUntil(cmds::LedgerRunUntil(args)) => { let mut chain_ctx = ctx.take_chain_or_exit(); @@ -40,7 +36,7 @@ pub fn main() -> Result<()> { sleep_until(args.time); chain_ctx.config.ledger.shell.action_at_height = Some(args.action_at_height); - node::run(chain_ctx.config.ledger, wasm_dir, None); + node::run(chain_ctx.config, wasm_dir, None); } cmds::Ledger::Reset(_) => { let chain_ctx = ctx.take_chain_or_exit(); diff --git a/crates/apps_lib/src/config/mod.rs b/crates/apps_lib/src/config/mod.rs index ba04f8bdb2..8490a075a7 100644 --- a/crates/apps_lib/src/config/mod.rs +++ b/crates/apps_lib/src/config/mod.rs @@ -51,7 +51,7 @@ pub struct NodeLocalConfig { pub recheck_process_proposal: bool, } -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq)] pub enum TendermintMode { Full, Validator, @@ -101,7 +101,6 @@ pub struct Ledger { #[derive(Clone, Debug, Serialize, Deserialize)] pub struct Shell { pub base_dir: PathBuf, - // pub ledger_address: SocketAddr, /// RocksDB block cache maximum size in bytes. /// When not set, defaults to 1/3 of the available memory. pub block_cache_bytes: Option, @@ -122,6 +121,8 @@ pub struct Shell { pub action_at_height: Option, /// Specify if tendermint is started as validator, fullnode or seednode pub tendermint_mode: TendermintMode, + /// A `tendermint_mode` set on the last node start-up, if any. + pub last_tendermint_mode: Option, /// When set, indicates after how many blocks a new snapshot /// will be taken (counting from the first block) pub blocks_between_snapshots: Option, @@ -155,6 +156,7 @@ impl Ledger { cometbft_dir: COMETBFT_DIR.into(), action_at_height: None, tendermint_mode: mode, + last_tendermint_mode: None, blocks_between_snapshots: None, snapshots_to_keep: None, }, diff --git a/crates/apps_lib/src/tendermint_node.rs b/crates/apps_lib/src/tendermint_node.rs index 6019c990cd..4ddf141780 100644 --- a/crates/apps_lib/src/tendermint_node.rs +++ b/crates/apps_lib/src/tendermint_node.rs @@ -1,7 +1,9 @@ +use std::fs; use std::path::{Path, PathBuf}; use namada_sdk::borsh::BorshSerializeExt; use namada_sdk::key::*; +use rand_core::OsRng; use serde_json::json; use sha2::{Digest, Sha256}; use thiserror::Error; @@ -98,6 +100,32 @@ pub fn write_validator_state(home_dir: impl AsRef) -> Result<()> { write_validator(validator_state(home_dir), STATE_DIR, STATE_FILE, state) } +/// Move the current validator key and state into a backup file (adds ".bk" +/// extension), if any. +pub fn backup_validator_key_and_state(home_dir: impl AsRef) { + let key_path = validator_key(&home_dir); + let state_path = validator_state(&home_dir); + for path in [key_path, state_path] { + if path.exists() { + fs::rename(&path, path.with_extension("json.bk")).expect( + "Must be able to backup existing CometBFT validator key and \ + state", + ); + } + } +} + +/// Write a dummy validator key and empty state file. +pub fn write_dummy_validator_key_and_state(home_dir: impl AsRef) { + let dummy_key: common::SecretKey = ed25519::SigScheme::generate(&mut OsRng) + .try_to_sk() + .unwrap(); + write_validator_key(&home_dir, &dummy_key) + .expect("Must be able to write dummy validator key."); + write_validator_state(&home_dir) + .expect("Must be able to write dummy validator state."); +} + /// Abstract over the initialization of validator data for Tendermint pub fn write_validator( path: PathBuf, @@ -107,7 +135,7 @@ pub fn write_validator( ) -> Result<()> { let parent_dir = path.parent().unwrap(); // Make sure the dir exists - std::fs::create_dir_all(parent_dir).map_err(|err| { + fs::create_dir_all(parent_dir).map_err(|err| { Error::CantCreate(format!( "{} at {}. Caused by {err}", err_dir, @@ -152,8 +180,8 @@ pub fn id_from_pk(pk: &common::PublicKey) -> TendermintNodeId { TendermintNodeId::new(bytes) } -fn ensure_empty(path: &PathBuf) -> std::io::Result { - std::fs::OpenOptions::new() +fn ensure_empty(path: &PathBuf) -> std::io::Result { + fs::OpenOptions::new() .create(true) .write(true) .truncate(true) diff --git a/crates/node/src/lib.rs b/crates/node/src/lib.rs index 65db933767..90684eed66 100644 --- a/crates/node/src/lib.rs +++ b/crates/node/src/lib.rs @@ -284,10 +284,12 @@ fn emit_warning_on_non_64bit_cpu() { /// Run the ledger with an async runtime pub fn run( - config: config::Ledger, + config: config::Config, wasm_dir: PathBuf, scheduled_migration: Option, ) { + handle_tendermint_mode_change(&config); + emit_warning_on_non_64bit_cpu(); let logical_cores = num_cpus::get(); @@ -323,7 +325,54 @@ pub fn run( .enable_all() .build() .unwrap() - .block_on(run_aux(config, wasm_dir, scheduled_migration)); + .block_on(run_aux(config.ledger, wasm_dir, scheduled_migration)); +} + +/// Check the `tendermint_mode` has changed from validator to non-validator +/// mode, in which case we replace and backup the validator keys and state to +/// avoid CometBFT running as a validator. We also persist the +/// `last_tendermint_node` in the config for the next run. +fn handle_tendermint_mode_change(config: &config::Config) { + // Check if the node was previously ran as a Validator, but isn't anymore + if !matches!( + config.ledger.shell.tendermint_mode, + TendermintMode::Validator + ) && matches!( + config.ledger.shell.last_tendermint_mode, + Some(TendermintMode::Validator) + ) { + // Backup and replace CometBFT validator key and state + let cometbft_dir = config.ledger.cometbft_dir(); + namada_apps_lib::tendermint_node::backup_validator_key_and_state( + &cometbft_dir, + ); + namada_apps_lib::tendermint_node::write_dummy_validator_key_and_state( + &cometbft_dir, + ); + } + + if config.ledger.shell.last_tendermint_mode.is_none() + || config.ledger.shell.last_tendermint_mode + != Some(config.ledger.shell.tendermint_mode) + { + let mut config = config.clone(); + config.ledger.shell.last_tendermint_mode = + Some(config.ledger.shell.tendermint_mode); + // Remove this field in case it's set from running `ledger run-until` - + // it shouldn't be persisted + config.ledger.shell.action_at_height = None; + let replace = true; + config + .write( + &config.ledger.shell.base_dir, + &config.ledger.chain_id, + replace, + ) + .expect( + "Must be able to persist config with changed \ + `last_tendermint_mode`.", + ); + } } /// Resets the tendermint_node state and removes database files @@ -600,7 +649,7 @@ fn start_abci_broadcaster_shell( ); // Construct our ABCI application. - let tendermint_mode = config.shell.tendermint_mode.clone(); + let tendermint_mode = config.shell.tendermint_mode; let proxy_app_address = convert_tm_addr_to_socket_addr(&config.cometbft.proxy_app);