From 466f909a8272719818dc3b5626d2edf2891cde36 Mon Sep 17 00:00:00 2001 From: Nick Thompson Date: Mon, 8 Jan 2024 17:18:29 -0800 Subject: [PATCH] Fix race conditions in differential evolution Through a combination of silly mistakes, I missed a pile of race conditions in the OpenMP threading. Switch to C++ threading. Note that this change requires serial generation of trial vectors. Hopefully I can figure out to parallelize the generation of trial vectors to reduce the serial section a la Ahmdahl's law, while simultaneously keeping thread sanitizer happy. --- .gitignore | 1 + README.md | 6 +- doc/math.qbk | 3 + .../differential_evolution.qbk | 20 +- doc/roots/roots_overview.qbk | 1 - example/differential_evolution.cpp | 6 +- .../boost/math/optimization/detail/common.hpp | 165 +++++++++ .../optimization/differential_evolution.hpp | 231 ++++++++++++ .../math/tools/differential_evolution.hpp | 329 ------------------ test/differential_evolution_test.cpp | 105 +++--- test/test_functions_for_optimization.hpp | 77 ++++ 11 files changed, 549 insertions(+), 395 deletions(-) rename doc/{roots => optimization}/differential_evolution.qbk (88%) create mode 100644 include/boost/math/optimization/detail/common.hpp create mode 100644 include/boost/math/optimization/differential_evolution.hpp delete mode 100644 include/boost/math/tools/differential_evolution.hpp create mode 100644 test/test_functions_for_optimization.hpp diff --git a/.gitignore b/.gitignore index 5ff21efb64..6435addffd 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,4 @@ cmake-build-debug/* .cmake/* build.ninja .ninja* +a.out diff --git a/README.md b/README.md index 07c6c4604b..b795613c46 100644 --- a/README.md +++ b/README.md @@ -38,11 +38,13 @@ All the implementations are fully generic and support the use of arbitrary "real These functions also provide the basis of support for the TR1 special functions. -### Root Finding and Function Minimisation +### Root Finding A comprehensive set of root-finding algorithms over the real line, both with derivatives and derivative free. -Also function minimisation via Brent's Method. +### Optimization + +Minimization of cost functions via Brent's method and differential evolution. ### Polynomials and Rational Functions diff --git a/doc/math.qbk b/doc/math.qbk index f4e80a39a7..ddefdd1586 100644 --- a/doc/math.qbk +++ b/doc/math.qbk @@ -724,6 +724,9 @@ and as a CD ISBN 0-9504833-2-X 978-0-9504833-2-0, Classification 519.2-dc22. [mathpart root_finding Root Finding \& Minimization Algorithms] [include roots/roots_overview.qbk] [endmathpart] [/mathpart roots Root Finding Algorithms] +[mathpart optimization Optimization] +[include optimization/differential_evolution.qbk] +[endmathpart] [/mathpart optimization Optimization] [mathpart poly Polynomials and Rational Functions] [include internals/polynomial.qbk] diff --git a/doc/roots/differential_evolution.qbk b/doc/optimization/differential_evolution.qbk similarity index 88% rename from doc/roots/differential_evolution.qbk rename to doc/optimization/differential_evolution.qbk index 23e0e32aa3..78edcda662 100644 --- a/doc/roots/differential_evolution.qbk +++ b/doc/optimization/differential_evolution.qbk @@ -10,16 +10,16 @@ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) [heading Synopsis] `` - #include + #include - namespace boost::math::tools { + namespace boost::math::optimization { template struct differential_evolution_parameters { using Real = typename ArgumentContainer::value_type; ArgumentContainer lower_bounds; ArgumentContainer upper_bounds; - Real F = static_cast(0.65); - double crossover_ratio = 0.5; + Real mutation_factor = static_cast(0.65); + double crossover_probability = 0.5; // Population in each generation: size_t NP = 200; size_t max_generations = 1000; @@ -47,8 +47,8 @@ We justify this design choice by reference to the "No free lunch" theorem of Wol `lower_bounds`: A container representing the lower bounds of the optimization space along each dimension. The `.size()` of the bounds should return the dimension of the problem. `upper_bounds`: A container representing the upper bounds of the optimization space along each dimension. It should have the same size of `lower_bounds`, and each element should be >= the corresponding element of `lower_bounds`. - `F`: The scale factor controlling the rate at which the population evolves (default is 0.65). - `crossover_ratio`: The crossover ratio determining the trade-off between exploration and exploitation (default is 0.5). + `mutation_factor`: The F or scale factor controlling the rate at which the population evolves (default is 0.65). + `crossover_probability`: The crossover probability determining the trade-off between exploration and exploitation (default is 0.5). `NP`: The population size (default is 200). Parallelization occurs over the population, so this should be "large". `max_generations`: The maximum number of generations for the optimization (default is 1000). `threads`: The number of threads to use for parallelization (default is the hardware concurrency). If the objective function is already multithreaded, then this should be set to 1 to prevent oversubscription. @@ -64,7 +64,7 @@ The most robust way of decreasing the probability of getting stuck in a local mi template ArgumentContainer differential_evolution(const Func cost_function, differential_evolution_parameters const &de_params, - URBG &g, + URBG &gen, std::invoke_result_t value_to_reach = std::numeric_limits>::quiet_NaN(), std::atomic *cancellation = nullptr, std::vector>> *queries = nullptr, @@ -102,6 +102,12 @@ Supported termination criteria are explicit requests for termination, value-to-r Price, Storn, and Lampinen, Section 2.8 also list population statistics and lack of accepted trials over many generations as sensible termination criteria. These could be supported if there is demand. +Parallelization with `std::thread` does have overhead-especially for very fast function calls. +We found that the function call needs to be roughly a microsecond for unambigous parallelization wins. +However, we have not provided conditional parallelization as computationally inexpensive cost functions are the exception; not the rule. +If there is a clear use case for conditional parallelization (cheap cost function in very high dimensions is a good example), +we can provide it. + [h4:references References] * Price, Kenneth, Rainer M. Storn, and Jouni A. Lampinen. ['Differential evolution: a practical approach to global optimization.] Springer Science & Business Media, 2006. diff --git a/doc/roots/roots_overview.qbk b/doc/roots/roots_overview.qbk index 78afe7a974..30bd2e3855 100644 --- a/doc/roots/roots_overview.qbk +++ b/doc/roots/roots_overview.qbk @@ -21,7 +21,6 @@ There are several fully-worked __root_finding_examples, including: [include quartic_roots.qbk] [include root_finding_examples.qbk] [include minima.qbk] -[include differential_evolution.qbk] [include root_comparison.qbk] [/ roots_overview.qbk diff --git a/example/differential_evolution.cpp b/example/differential_evolution.cpp index 2b175ef5e9..4fbbe0566a 100644 --- a/example/differential_evolution.cpp +++ b/example/differential_evolution.cpp @@ -5,10 +5,10 @@ * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */ #include -#include +#include -using boost::math::tools::differential_evolution_parameters; -using boost::math::tools::differential_evolution; +using boost::math::optimization::differential_evolution_parameters; +using boost::math::optimization::differential_evolution; double rosenbrock(std::vector const & x) { double result = 0; diff --git a/include/boost/math/optimization/detail/common.hpp b/include/boost/math/optimization/detail/common.hpp new file mode 100644 index 0000000000..e2ea5b7f0f --- /dev/null +++ b/include/boost/math/optimization/detail/common.hpp @@ -0,0 +1,165 @@ +/* + * Copyright Nick Thompson, 2024 + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ +#ifndef BOOST_MATH_OPTIMIZATION_DETAIL_COMMON_HPP +#define BOOST_MATH_OPTIMIZATION_DETAIL_COMMON_HPP +#include +#include +#include +#include +#include +#include + +namespace boost::math::optimization::detail { + +template struct has_resize : std::false_type {}; + +template +struct has_resize().resize(size_t{}))>> : std::true_type {}; + +template constexpr bool has_resize_v = has_resize::value; + +template +void validate_bounds(ArgumentContainer const &lower_bounds, ArgumentContainer const &upper_bounds) { + using std::isfinite; + std::ostringstream oss; + if (lower_bounds.size() == 0) { + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": The dimension of the problem cannot be zero."; + throw std::domain_error(oss.str()); + } + if (upper_bounds.size() != lower_bounds.size()) { + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": There must be the same number of lower bounds as upper bounds, but given "; + oss << upper_bounds.size() << " upper bounds, and " << lower_bounds.size() << " lower bounds."; + throw std::domain_error(oss.str()); + } + for (size_t i = 0; i < lower_bounds.size(); ++i) { + auto lb = lower_bounds[i]; + auto ub = upper_bounds[i]; + if (lb > ub) { + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": The upper bound must be greater than or equal to the lower bound, but the upper bound is " << ub + << " and the lower is " << lb << "."; + throw std::domain_error(oss.str()); + } + if (!isfinite(lb)) { + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": The lower bound must be finite, but got " << lb << "."; + oss << " For infinite bounds, emulate with std::numeric_limits::lower() or use a standard infinite->finite " + "transform."; + throw std::domain_error(oss.str()); + } + if (!isfinite(ub)) { + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": The upper bound must be finite, but got " << ub << "."; + oss << " For infinite bounds, emulate with std::numeric_limits::max() or use a standard infinite->finite " + "transform."; + throw std::domain_error(oss.str()); + } + } +} + +template +std::vector random_initial_population(ArgumentContainer const &lower_bounds, + ArgumentContainer const &upper_bounds, + size_t initial_population_size, URBG &&gen) { + using Real = typename ArgumentContainer::value_type; + constexpr bool has_resize = detail::has_resize_v; + std::vector population(initial_population_size); + auto const dimension = lower_bounds.size(); + for (size_t i = 0; i < population.size(); ++i) { + if constexpr (has_resize) { + population[i].resize(dimension); + } else { + // Argument type must be known at compile-time; like std::array: + if (population[i].size() != dimension) { + std::ostringstream oss; + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": For containers which do not have resize, the default size must be the same as the dimension, "; + oss << "but the default container size is " << population[i].size() << " and the dimension of the problem is " + << dimension << "."; + oss << " The function argument container type is " << typeid(ArgumentContainer).name() << ".\n"; + throw std::runtime_error(oss.str()); + } + } + } + + // Why don't we provide an option to initialize with (say) a Gaussian distribution? + // > If the optimum's location is fairly well known, + // > a Gaussian distribution may prove somewhat faster, although it + // > may also increase the probability that the population will converge prematurely. + // > In general, uniform distributions are preferred, since they best reflect + // > the lack of knowledge about the optimum's location. + // - Differential Evolution: A Practical Approach to Global Optimization + // That said, scipy uses Latin Hypercube sampling and says self-avoiding sequences are preferable. + // So this is something that could be investigated and potentially improved. + using std::uniform_real_distribution; + uniform_real_distribution dis(Real(0), Real(1)); + for (size_t i = 0; i < population.size(); ++i) { + for (size_t j = 0; j < dimension; ++j) { + auto const &lb = lower_bounds[j]; + auto const &ub = upper_bounds[j]; + population[i][j] = lb + dis(gen) * (ub - lb); + } + } + + return population; +} + +template +void validate_initial_guess(ArgumentContainer const &initial_guess, ArgumentContainer const &lower_bounds, + ArgumentContainer const &upper_bounds) { + std::ostringstream oss; + auto const dimension = lower_bounds.size(); + if (initial_guess.size() != dimension) { + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": The initial guess must have the same dimensions as the problem,"; + oss << ", but the problem size is " << dimension << " and the initial guess has " << initial_guess.size() + << " elements."; + throw std::domain_error(oss.str()); + } + for (size_t i = 0; i < dimension; ++i) { + auto lb = lower_bounds[i]; + auto ub = upper_bounds[i]; + if (!isfinite(initial_guess[i])) { + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": At index " << i << ", the initial guess is " << initial_guess[i] + << ", make sure all elements of the initial guess are finite."; + throw std::domain_error(oss.str()); + } + if (initial_guess[i] < lb || initial_guess[i] > ub) { + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": At index " << i << " the initial guess " << initial_guess[i] << " is not in the bounds [" << lb << ", " + << ub << "]."; + throw std::domain_error(oss.str()); + } + } +} + +// Return indices corresponding to the minimum function values. +template std::vector best_indices(std::vector const &function_values) { + using std::isnan; + const size_t n = function_values.size(); + std::vector indices(n); + for (size_t i = 0; i < n; ++i) { + indices[i] = i; + } + + std::sort(indices.begin(), indices.end(), [&](size_t a, size_t b) { + if (isnan(function_values[a])) { + return false; + } + if (isnan(function_values[b])) { + return true; + } + return function_values[a] < function_values[b]; + }); + return indices; +} + +} // namespace boost::math::optimization::detail +#endif \ No newline at end of file diff --git a/include/boost/math/optimization/differential_evolution.hpp b/include/boost/math/optimization/differential_evolution.hpp new file mode 100644 index 0000000000..36ccfa781d --- /dev/null +++ b/include/boost/math/optimization/differential_evolution.hpp @@ -0,0 +1,231 @@ +/* + * Copyright Nick Thompson, 2024 + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ +#ifndef BOOST_MATH_OPTIMIZATION_DIFFERENTIAL_EVOLUTION_HPP +#define BOOST_MATH_OPTIMIZATION_DIFFERENTIAL_EVOLUTION_HPP +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace boost::math::optimization { + +// Storn, R., Price, K. (1997). Differential evolution-a simple and efficient heuristic for global optimization over +// continuous spaces. +// Journal of global optimization, 11, 341-359. +// See: +// https://www.cp.eng.chula.ac.th/~prabhas//teaching/ec/ec2012/storn_price_de.pdf + +// We provide the parameters in a struct-there are too many of them and they are too unwieldy to pass individually: +template struct differential_evolution_parameters { + using Real = typename ArgumentContainer::value_type; + ArgumentContainer lower_bounds; + ArgumentContainer upper_bounds; + // mutation factor is also called scale factor or just F in the literature: + Real mutation_factor = static_cast(0.65); + double crossover_probability = 0.5; + // Population in each generation: + size_t NP = 500; + size_t max_generations = 1000; + ArgumentContainer const *initial_guess = nullptr; + unsigned threads = std::thread::hardware_concurrency(); +}; + +template +void validate_differential_evolution_parameters(differential_evolution_parameters const &de_params) { + using std::isfinite; + using std::isnan; + std::ostringstream oss; + detail::validate_bounds(de_params.lower_bounds, de_params.upper_bounds); + if (de_params.NP < 4) { + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": The population size must be at least 4, but requested population size of " << de_params.NP << "."; + throw std::invalid_argument(oss.str()); + } + // From: "Differential Evolution: A Practical Approach to Global Optimization (Natural Computing Series)" + // > The scale factor, F in (0,1+), is a positive real number that controls the rate at which the population evolves. + // > While there is no upper limit on F, effective values are seldom greater than 1.0. + // ... + // Also see "Limits on F", Section 2.5.1: + // > This discontinuity at F = 1 reduces the number of mutants by half and can result in erratic convergence... + auto F = de_params.mutation_factor; + if (isnan(F) || F >= 1 || F <= 0) { + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": F in (0, 1) is required, but got F=" << F << "."; + throw std::domain_error(oss.str()); + } + if (de_params.max_generations < 1) { + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": There must be at least one generation."; + throw std::invalid_argument(oss.str()); + } + if (de_params.initial_guess) { + detail::validate_initial_guess(*de_params.initial_guess, de_params.lower_bounds, de_params.upper_bounds); + } + if (de_params.threads == 0) { + oss << __FILE__ << ":" << __LINE__ << ":" << __func__; + oss << ": There must be at least one thread."; + throw std::invalid_argument(oss.str()); + } +} + +template +ArgumentContainer differential_evolution( + const Func cost_function, differential_evolution_parameters const &de_params, URBG &gen, + std::invoke_result_t target_value = + std::numeric_limits>::quiet_NaN(), + std::atomic *cancellation = nullptr, + std::vector>> *queries = nullptr, + std::atomic> *current_minimum_cost = nullptr) { + using Real = typename ArgumentContainer::value_type; + using ResultType = std::invoke_result_t; + using std::clamp; + using std::isnan; + using std::round; + using std::uniform_real_distribution; + validate_differential_evolution_parameters(de_params); + const size_t dimension = de_params.lower_bounds.size(); + auto NP = de_params.NP; + auto population = detail::random_initial_population(de_params.lower_bounds, de_params.upper_bounds, NP, gen); + if (de_params.initial_guess) { + population[0] = *de_params.initial_guess; + } + std::vector cost(NP, std::numeric_limits::quiet_NaN()); + std::atomic target_attained = false; + // This mutex is only used if the queries are stored: + std::mutex mt; + + std::vector thread_pool; + auto const threads = de_params.threads; + for (size_t j = 0; j < threads; ++j) { + // Note that if some members of the population take way longer to compute, + // then this parallelization strategy is very suboptimal. + // However, we tried using std::async (which should be robust to this particular problem), + // but the overhead was just totally unacceptable on ARM Macs (the only platform tested). + // As the economists say "there are no solutions, only tradeoffs". + thread_pool.emplace_back([&, j]() { + for (size_t i = j; i < cost.size(); i += threads) { + cost[i] = cost_function(population[i]); + if (current_minimum_cost && cost[i] < *current_minimum_cost) { + *current_minimum_cost = cost[i]; + } + if (queries) { + std::scoped_lock lock(mt); + queries->push_back(std::make_pair(population[i], cost[i])); + } + if (!isnan(target_value) && cost[i] <= target_value) { + target_attained = true; + } + } + }); + } + for (auto &thread : thread_pool) { + thread.join(); + } + + std::vector trial_vectors(NP); + for (size_t i = 0; i < NP; ++i) { + if constexpr (detail::has_resize_v) { + trial_vectors[i].resize(dimension); + } + } + + for (size_t generation = 0; generation < de_params.max_generations; ++generation) { + if (cancellation && *cancellation) { + break; + } + if (target_attained) { + break; + } + + // You might be tempted to parallelize the generation of trial vectors. + // Here's the deal: Reproducibly generating random numbers is a nightmare. + // I first tried seeding thread-local random number generators with the global generator, + // but ThreadSanitizer didn't like it. I *suspect* there's a way around this, but + // even if it's formally threadsafe, there's still a lot of effort to get it computationally reproducible. + uniform_real_distribution unif01(Real(0), Real(1)); + for (size_t i = 0; i < NP; ++i) { + size_t r1, r2, r3; + do { + r1 = gen() % NP; + } while (r1 == i); + do { + r2 = gen() % NP; + } while (r2 == i || r2 == r1); + do { + r3 = gen() % NP; + } while (r3 == i || r3 == r2 || r3 == r1); + for (size_t j = 0; j < dimension; ++j) { + // See equation (4) of the reference: + auto guaranteed_changed_idx = gen() % dimension; + if (unif01(gen) < de_params.crossover_probability || j == guaranteed_changed_idx) { + auto tmp = population[r1][j] + de_params.mutation_factor * (population[r2][j] - population[r3][j]); + auto const &lb = de_params.lower_bounds[j]; + auto const &ub = de_params.upper_bounds[j]; + // Some others recommend regenerating the indices rather than clamping; + // I dunno seems like it could get stuck regenerating . . . + trial_vectors[i][j] = clamp(tmp, lb, ub); + } else { + trial_vectors[i][j] = population[i][j]; + } + } + } + + thread_pool.resize(0); + for (size_t j = 0; j < threads; ++j) { + thread_pool.emplace_back([&, j]() { + for (size_t i = j; i < cost.size(); i += threads) { + if (target_attained) { + return; + } + if (cancellation && *cancellation) { + return; + } + auto const trial_cost = cost_function(trial_vectors[i]); + if (isnan(trial_cost)) { + continue; + } + if (queries) { + std::scoped_lock lock(mt); + queries->push_back(std::make_pair(trial_vectors[i], trial_cost)); + } + if (trial_cost < cost[i] || isnan(cost[i])) { + cost[i] = trial_cost; + if (!isnan(target_value) && cost[i] <= target_value) { + target_attained = true; + } + if (current_minimum_cost && cost[i] < *current_minimum_cost) { + *current_minimum_cost = cost[i]; + } + population[i] = trial_vectors[i]; + } + } + }); + } + for (auto &thread : thread_pool) { + thread.join(); + } + } + + auto it = std::min_element(cost.begin(), cost.end()); + return population[std::distance(cost.begin(), it)]; +} + +} // namespace boost::math::optimization +#endif diff --git a/include/boost/math/tools/differential_evolution.hpp b/include/boost/math/tools/differential_evolution.hpp deleted file mode 100644 index 61066c5c82..0000000000 --- a/include/boost/math/tools/differential_evolution.hpp +++ /dev/null @@ -1,329 +0,0 @@ -/* - * Copyright Nick Thompson, 2023 - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - */ -#ifndef BOOST_MATH_TOOLS_DIFFERENTIAL_EVOLUTION_HPP -#define BOOST_MATH_TOOLS_DIFFERENTIAL_EVOLUTION_HPP -#include -#include -#include -#include -#include -#if __has_include() -#include -#endif -#include -#include -#include -#include -#include -#include -#include - -namespace boost::math::tools { - -namespace detail { - -template struct has_resize : std::false_type {}; - -template -struct has_resize().resize(size_t{}))>> : std::true_type {}; - -template constexpr bool has_resize_v = has_resize::value; - -} // namespace detail - -// Storn, R., Price, K. (1997). Differential evolution-a simple and efficient heuristic for global optimization over -// continuous spaces. -// Journal of global optimization, 11, 341-359. -// See: -// https://www.cp.eng.chula.ac.th/~prabhas//teaching/ec/ec2012/storn_price_de.pdf - -// We provide the parameters in a struct-there are too many of them and they are too unwieldy to pass individually: -template struct differential_evolution_parameters { - using Real = typename ArgumentContainer::value_type; - ArgumentContainer lower_bounds; - ArgumentContainer upper_bounds; - Real F = static_cast(0.65); - double crossover_ratio = 0.5; - // Population in each generation: - size_t NP = 200; - - size_t max_generations = 1000; -#if defined(_OPENMP) - size_t threads = std::thread::hardware_concurrency(); -#else - size_t threads = 1; -#endif - ArgumentContainer const *initial_guess = nullptr; -}; - -template -void validate_differential_evolution_parameters(differential_evolution_parameters const &de_params) { - using std::isfinite; - using std::isnan; - std::ostringstream oss; - if (de_params.threads == 0) { - oss << __FILE__ << ":" << __LINE__ << ":" << __func__; - oss << ": Requested zero threads to perform the calculation, but at least 1 is required."; - throw std::invalid_argument(oss.str()); - } - if (de_params.lower_bounds.size() == 0) { - oss << __FILE__ << ":" << __LINE__ << ":" << __func__; - oss << ": The dimension of the problem cannot be zero."; - throw std::domain_error(oss.str()); - } - if (de_params.upper_bounds.size() != de_params.lower_bounds.size()) { - oss << __FILE__ << ":" << __LINE__ << ":" << __func__; - oss << ": There must be the same number of lower bounds as upper bounds, but given "; - oss << de_params.upper_bounds.size() << " upper bounds, and " << de_params.lower_bounds.size() << " lower bounds."; - throw std::domain_error(oss.str()); - } - for (size_t i = 0; i < de_params.lower_bounds.size(); ++i) { - auto lb = de_params.lower_bounds[i]; - auto ub = de_params.upper_bounds[i]; - if (lb > ub) { - oss << __FILE__ << ":" << __LINE__ << ":" << __func__; - oss << ": The upper bound must be greater than or equal to the lower bound, but the upper bound is " << ub - << " and the lower is " << lb << "."; - throw std::domain_error(oss.str()); - } - if (!isfinite(lb)) { - oss << __FILE__ << ":" << __LINE__ << ":" << __func__; - oss << ": The lower bound must be finite, but got " << lb << "."; - oss << " For infinite bounds, emulate with std::numeric_limits::lower() or use a standard infinite->finite " - "transform."; - throw std::domain_error(oss.str()); - } - if (!isfinite(ub)) { - oss << __FILE__ << ":" << __LINE__ << ":" << __func__; - oss << ": The upper bound must be finite, but got " << ub << "."; - oss << " For infinite bounds, emulate with std::numeric_limits::max() or use a standard infinite->finite " - "transform."; - throw std::domain_error(oss.str()); - } - } - if (de_params.NP < 4) { - oss << __FILE__ << ":" << __LINE__ << ":" << __func__; - oss << ": The population size must be at least 4, but requested population size of " << de_params.NP << "."; - throw std::invalid_argument(oss.str()); - } - if (de_params.threads > de_params.NP) { - oss << __FILE__ << ":" << __LINE__ << ":" << __func__; - oss << ": There must be more individuals in the population than threads."; - throw std::invalid_argument(oss.str()); - } - // From: "Differential Evolution: A Practical Approach to Global Optimization (Natural Computing Series)" - // > The scale factor, F in (0,1+), is a positive real number that controls the rate at which the population evolves. - // > While there is no upper limit on F, effective values are seldom greater than 1.0. - // ... - // Also see "Limits on F", Section 2.5.1: - // > This discontinuity at F = 1 reduces the number of mutants by half and can result in erratic convergence... - auto F = de_params.F; - if (isnan(F) || F >= 1 || F <= 0) { - oss << __FILE__ << ":" << __LINE__ << ":" << __func__; - oss << ": F in (0, 1) is required, but got F=" << F << "."; - throw std::domain_error(oss.str()); - } - if (de_params.max_generations < 1) { - oss << __FILE__ << ":" << __LINE__ << ":" << __func__; - oss << ": There must be at least one generation."; - throw std::invalid_argument(oss.str()); - } - if (de_params.initial_guess) { - auto dimension = de_params.lower_bounds.size(); - if (de_params.initial_guess->size() != dimension) { - oss << __FILE__ << ":" << __LINE__ << ":" << __func__; - oss << ": The initial guess must have the same dimensions as the problem,"; - oss << ", but the problem size is " << dimension << " and the initial guess has " - << de_params.initial_guess->size() << " elements."; - throw std::domain_error(oss.str()); - } - auto const &guess = *de_params.initial_guess; - for (size_t i = 0; i < dimension; ++i) { - auto lb = de_params.lower_bounds[i]; - auto ub = de_params.upper_bounds[i]; - if (guess[i] < lb || guess[i] > ub) { - oss << __FILE__ << ":" << __LINE__ << ":" << __func__; - oss << ": At index " << i << " the initial guess " << guess[i] << " is not in the bounds [" << lb << ", " << ub - << "]."; - throw std::domain_error(oss.str()); - } - } - } -#if !defined(_OPENMP) - if (de_params.threads != 1) { - oss << __FILE__ << ":" << __LINE__ << ":" << __func__; - oss << ": If OpenMP is not available, then there algorithm must run on a single thread, but requested " - << de_params.threads << " threads."; - throw std::invalid_argument(oss.str()); - } -#endif -} - -template -ArgumentContainer differential_evolution( - const Func cost_function, differential_evolution_parameters const &de_params, URBG &g, - std::invoke_result_t target_value = - std::numeric_limits>::quiet_NaN(), - std::atomic *cancellation = nullptr, - std::vector>> *queries = nullptr, - std::atomic> *current_minimum_cost = nullptr) { - using Real = typename ArgumentContainer::value_type; - validate_differential_evolution_parameters(de_params); - constexpr bool has_resize = detail::has_resize_v; - - using ResultType = std::invoke_result_t; - using std::clamp; - using std::isnan; - using std::round; - auto const NP = de_params.NP; - std::vector population(NP); - auto const dimension = de_params.lower_bounds.size(); - for (size_t i = 0; i < population.size(); ++i) { - if constexpr (has_resize) { - population[i].resize(dimension); - } else { - // Argument type must be known at compile-time; like std::array: - if (population[i].size() != dimension) { - std::ostringstream oss; - oss << __FILE__ << ":" << __LINE__ << ":" << __func__; - oss << ": For containers which do not have resize, the default size must be the same as the dimension, "; - oss << "but the default container size is " << population[i].size() << " and the dimension of the problem is " - << dimension << "."; - oss << " The function argument container type is " << typeid(ArgumentContainer).name() << ".\n"; - throw std::runtime_error(oss.str()); - } - } - } - // Why don't we provide an option to initialize with (say) a Gaussian distribution? - // > If the optimum's location is fairly well known, - // > a Gaussian distribution may prove somewhat faster, although it - // > may also increase the probability that the population will converge prematurely. - // > In general, uniform distributions are preferred, since they best reflect - // > the lack of knowledge about the optimum's location. - // - Differential Evolution: A Practical Approach to Global Optimization - // That said, scipy uses Latin Hypercube sampling and says self-avoiding sequences are preferable. - // So this is something that could be investigated and potentially improved. - using std::uniform_real_distribution; - uniform_real_distribution dis(Real(0), Real(1)); - for (size_t i = 0; i < population.size(); ++i) { - for (size_t j = 0; j < dimension; ++j) { - auto const &lb = de_params.lower_bounds[j]; - auto const &ub = de_params.upper_bounds[j]; - population[i][j] = lb + dis(g) * (ub - lb); - } - } - if (de_params.initial_guess) { - population[0] = *de_params.initial_guess; - } - - std::atomic target_attained = false; - std::vector cost(NP, std::numeric_limits::quiet_NaN()); -#if defined(_OPENMP) -#pragma omp parallel for num_threads(de_params.threads) -#endif - for (size_t i = 0; i < cost.size(); ++i) { - cost[i] = cost_function(population[i]); - if (!isnan(target_value) && cost[i] <= target_value) { - target_attained = true; - } - if (current_minimum_cost && cost[i] < *current_minimum_cost) { - *current_minimum_cost = cost[i]; - } - if (queries) { -#if defined(_OPENMP) // get rid of -Wunknown-pragmas when OpenMP is not available: -#pragma omp critical -#endif - queries->push_back(std::make_pair(population[i], cost[i])); - } - } - - // This probably won't show up on any performance metrics, but just convert everything to integer ops: - const auto crossover_int = - static_cast(round(static_cast((URBG::max)() - (URBG::min)()) * de_params.crossover_ratio)); - std::vector generators(de_params.threads); - for (size_t i = 0; i < de_params.threads; ++i) { - generators[i].seed(g()); - } - for (size_t generation = 0; generation < de_params.max_generations; ++generation) { - if (cancellation && *cancellation) { - break; - } - if (target_attained) { - break; - } -#if defined(_OPENMP) -#pragma omp parallel for num_threads(de_params.threads) -#endif - for (size_t i = 0; i < NP; ++i) { -#if defined(_OPENMP) - size_t thread_idx = omp_get_thread_num(); -#else - size_t thread_idx = 0; -#endif - auto &gen = generators[thread_idx]; - size_t r1, r2, r3; - do { - r1 = gen() % NP; - } while (r1 == i); - do { - r2 = gen() % NP; - } while (r2 == i || r2 == r1); - do { - r3 = gen() % NP; - } while (r3 == i || r3 == r2 || r3 == r1); - // Hopefully the compiler optimizes this so that it's not allocated on every iteration: - ArgumentContainer trial_vector; - if constexpr (has_resize) { - trial_vector.resize(dimension); - } - for (size_t j = 0; j < dimension; ++j) { - // See equation (4) of the reference: - auto guaranteed_changed_idx = gen() % NP; - if (gen() < crossover_int || j == guaranteed_changed_idx) { - auto tmp = population[r1][j] + de_params.F * (population[r2][j] - population[r3][j]); - auto const &lb = de_params.lower_bounds[j]; - auto const &ub = de_params.upper_bounds[j]; - // Some others recommend regenerating the indices rather than clamping; - // I dunno seems like it could get stuck regenerating . . . - trial_vector[j] = clamp(tmp, lb, ub); - } else { - trial_vector[j] = population[i][j]; - } - } - auto trial_cost = cost_function(trial_vector); - if (queries) { -#if defined(_OPENMP) -#pragma omp critical -#endif - queries->push_back(std::make_pair(trial_vector, trial_cost)); - } - - if (isnan(trial_cost)) { - continue; - } - if (trial_cost < cost[i] || isnan(cost[i])) { - std::swap(population[i], trial_vector); - cost[i] = trial_cost; - if (!isnan(target_value) && cost[i] <= target_value) { - target_attained = true; - // In a single-threaded context, I'd put a break statement here, - // but OpenMP does not allow break statements in for loops. - // We'll just have to wait until the end of this generation. - } - if (current_minimum_cost && cost[i] < *current_minimum_cost) { - *current_minimum_cost = cost[i]; - } - } - } - } - - auto it = std::min_element(cost.begin(), cost.end()); - return population[std::distance(cost.begin(), it)]; -} - -} // namespace boost::math::tools -#endif diff --git a/test/differential_evolution_test.cpp b/test/differential_evolution_test.cpp index ffa54c0925..1267568015 100644 --- a/test/differential_evolution_test.cpp +++ b/test/differential_evolution_test.cpp @@ -6,29 +6,15 @@ */ #include "math_unit_test.hpp" -#include -#include +#include "test_functions_for_optimization.hpp" +#include #include -using boost::math::constants::e; -using boost::math::constants::two_pi; -using boost::math::tools::differential_evolution; -using boost::math::tools::differential_evolution_parameters; -using std::cbrt; -using std::cos; -using std::exp; -using std::sqrt; - -// Taken from: https://en.wikipedia.org/wiki/Test_functions_for_optimization -template Real ackley(std::array const &v) { - Real x = v[0]; - Real y = v[1]; - Real arg1 = -sqrt((x * x + y * y) / 2) / 5; - Real arg2 = cos(two_pi() * x) + cos(two_pi() * y); - return -20 * exp(arg1) - exp(arg2 / 2) + 20 + e(); -} +using boost::math::optimization::differential_evolution; +using boost::math::optimization::differential_evolution_parameters; template void test_ackley() { + std::cout << "Testing differential evolution on the Ackley function . . .\n"; using ArgType = std::array; auto de_params = differential_evolution_parameters(); de_params.lower_bounds = {-5, -5}; @@ -53,13 +39,8 @@ template void test_ackley() { CHECK_EQUAL(local_minima[1], Real(0)); } -template auto rosenbrock_saddle(std::array const &v) { - auto x = v[0]; - auto y = v[1]; - return 100 * (x * x - y) * (x * x - y) + (1 - x) * (1 - x); -} - template void test_rosenbrock_saddle() { + std::cout << "Testing differential evolution on the Rosenbrock saddle . . .\n"; using ArgType = std::array; auto de_params = differential_evolution_parameters(); de_params.lower_bounds = {0.5, 0.5}; @@ -78,16 +59,8 @@ template void test_rosenbrock_saddle() { CHECK_GE(std::abs(local_minima[0] - Real(1)), std::sqrt(std::numeric_limits::epsilon())); } -template Real rastrigin(std::vector const &v) { - Real A = 10; - Real y = 10 * v.size(); - for (auto x : v) { - y += x * x - A * cos(two_pi() * x); - } - return y; -} - template void test_rastrigin() { + std::cout << "Testing differential evolution on the Rastrigin function . . .\n"; using ArgType = std::vector; auto de_params = differential_evolution_parameters(); de_params.lower_bounds.resize(8, static_cast(-5.12)); @@ -104,43 +77,69 @@ template void test_rastrigin() { CHECK_LE(rastrigin(local_minima), target_value); } -double sphere(std::vector const &v) { - double r = 0.0; - for (auto x : v) { - double x_ = static_cast(x); - r += x_ * x_; - } - if (r >= 1) { - return std::numeric_limits::quiet_NaN(); - } - return r; -} - // Tests NaN return types and return type != input type: void test_sphere() { + std::cout << "Testing differential evolution on the sphere function . . .\n"; using ArgType = std::vector; auto de_params = differential_evolution_parameters(); - de_params.lower_bounds.resize(8, -1); - de_params.upper_bounds.resize(8, 1); + de_params.lower_bounds.resize(3, -1); + de_params.upper_bounds.resize(3, 1); de_params.NP *= 10; de_params.max_generations *= 10; + de_params.crossover_probability = 0.9; + double target_value = 1e-8; + de_params.threads = 1; std::mt19937_64 gen(56789); - auto local_minima = differential_evolution(sphere, de_params, gen); + auto local_minima = differential_evolution(sphere, de_params, gen, target_value); + CHECK_LE(sphere(local_minima), target_value); + // Check computational reproducibility: + gen.seed(56789); + auto local_minima_2 = differential_evolution(sphere, de_params, gen, target_value); + for (size_t i = 0; i < local_minima.size(); ++i) { + CHECK_EQUAL(local_minima[i], local_minima_2[i]); + } +} + +template +void test_three_hump_camel() { + std::cout << "Testing differential evolution on the three hump camel . . .\n"; + using ArgType = std::array; + auto de_params = differential_evolution_parameters(); + de_params.lower_bounds[0] = -5.0; + de_params.lower_bounds[1] = -5.0; + de_params.upper_bounds[0] = 5.0; + de_params.upper_bounds[1] = 5.0; + std::mt19937_64 gen(56789); + auto local_minima = differential_evolution(three_hump_camel, de_params, gen); for (auto x : local_minima) { CHECK_ABSOLUTE_ERROR(0.0f, x, 2e-4f); } } -#define GCC_COMPILER (defined(__GNUC__) && !defined(__clang__)) +template +void test_beale() { + std::cout << "Testing differential evolution on the Beale function . . .\n"; + using ArgType = std::array; + auto de_params = differential_evolution_parameters(); + de_params.lower_bounds[0] = -5.0; + de_params.lower_bounds[1] = -5.0; + de_params.upper_bounds[0]= 5.0; + de_params.upper_bounds[1]= 5.0; + std::mt19937_64 gen(56789); + auto local_minima = differential_evolution(beale, de_params, gen); + CHECK_ABSOLUTE_ERROR(Real(3), local_minima[0], Real(2e-4)); + CHECK_ABSOLUTE_ERROR(Real(1)/Real(2), local_minima[1], Real(2e-4)); +} int main() { - // GCC<=8 rejects the function call syntax we use here. - // Just do a workaround: -#if !defined(GCC_COMPILER) || (defined(GCC_COMPILER) && __GNUC__ >= 9) + +#if defined(__clang__) || defined(_MSC_VER) test_ackley(); test_ackley(); test_rosenbrock_saddle(); test_rastrigin(); + test_three_hump_camel(); + test_beale(); #endif test_sphere(); return boost::math::test::report_errors(); diff --git a/test/test_functions_for_optimization.hpp b/test/test_functions_for_optimization.hpp new file mode 100644 index 0000000000..46b9779af1 --- /dev/null +++ b/test/test_functions_for_optimization.hpp @@ -0,0 +1,77 @@ +/* + * Copyright Nick Thompson, 2024 + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ +#ifndef TEST_FUNCTIONS_FOR_OPTIMIZATION_HPP +#define TEST_FUNCTIONS_FOR_OPTIMIZATION_HPP +#include + +// Taken from: https://en.wikipedia.org/wiki/Test_functions_for_optimization +template Real ackley(std::array const &v) { + using std::sqrt; + using std::cos; + using std::exp; + using boost::math::constants::two_pi; + using boost::math::constants::e; + Real x = v[0]; + Real y = v[1]; + Real arg1 = -sqrt((x * x + y * y) / 2) / 5; + Real arg2 = cos(two_pi() * x) + cos(two_pi() * y); + return -20 * exp(arg1) - exp(arg2 / 2) + 20 + e(); +} + +template auto rosenbrock_saddle(std::array const &v) { + auto x = v[0]; + auto y = v[1]; + return 100 * (x * x - y) * (x * x - y) + (1 - x) * (1 - x); +} + + +template Real rastrigin(std::vector const &v) { + using std::cos; + using boost::math::constants::two_pi; + Real A = 10; + Real y = 10 * v.size(); + for (auto x : v) { + y += x * x - A * cos(two_pi() * x); + } + return y; +} + +// Useful for testing return-type != scalar argument type, +// and robustness to NaNs: +double sphere(std::vector const &v) { + double r = 0.0; + for (auto x : v) { + double x_ = static_cast(x); + r += x_ * x_; + } + if (r >= 1) { + return std::numeric_limits::quiet_NaN(); + } + return r; +} + +template +Real three_hump_camel(std::array const & v) { + Real x = v[0]; + Real y = v[1]; + auto xsq = x*x; + return 2*xsq - (1 + Real(1)/Real(20))*xsq*xsq + xsq*xsq*xsq/6 + x*y + y*y; +} + +// Minima occurs at (3, 1/2) with value 0: +template +Real beale(std::array const & v) { + Real x = v[0]; + Real y = v[1]; + Real t1 = Real(3)/Real(2) -x + x*y; + Real t2 = Real(9)/Real(4) -x + x*y*y; + Real t3 = Real(21)/Real(8) -x + x*y*y*y; + return t1*t1 + t2*t2 + t3*t3; +} + + +#endif