Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CP-53335, topology: do not raise exception when loading invalid distance matrices (NUMA) #6249

Merged
merged 9 commits into from
Feb 3, 2025
189 changes: 118 additions & 71 deletions ocaml/xenopsd/lib/topology.ml
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,8 @@

module D = Debug.Make (struct let name = "topology" end)

open D

module CPUSet = struct
include Set.Make (struct
type t = int

let compare (x : int) (y : int) = compare x y
end)
include Set.Make (Int)

let pp_dump = Fmt.using to_seq Fmt.(Dump.seq int)

Expand Down Expand Up @@ -95,27 +89,32 @@ let seq_range a b =
let rec next i () = if i = b then Seq.Nil else Seq.Cons (i, next (i + 1)) in
next a

(** [gen_2n n] Generates all non-empty subsets of the set of [n] nodes. *)
let seq_gen_2n n =
let seq_filteri p s =
let rec loop i s () =
edwintorok marked this conversation as resolved.
Show resolved Hide resolved
match s () with
| Seq.Nil ->
Seq.Nil
| Cons (hd, s) ->
if p i hd then
Cons (hd, loop (i + 1) s)
else
loop (i + 1) s ()
in
loop 0 s

(** [seq_all_subsets n] Generates all non-empty subsets of the [nodes] set. *)
let seq_all_subsets nodes =
let n = Seq.length nodes in
(* A node can either be present in the output or not, so use a loop [1, 2^n)
and have the [i]th bit determine that. *)
let of_mask i =
seq_range 0 n |> Seq.filter (fun bit -> (i lsr bit) land 1 = 1)
in
let of_mask i = nodes |> seq_filteri (fun bit _ -> (i lsr bit) land 1 = 1) in
seq_range 1 (1 lsl n) |> Seq.map of_mask

(** [seq_sort ~cmp s] sorts [s] in a temporary place using [cmp]. *)
let seq_sort ~cmp s =
let a = Array.of_seq s in
Array.fast_sort cmp a ; Array.to_seq a

(** [seq_append a b] is the sequence [a] followed by [b] *)
let seq_append (a : 'a Seq.t) (b : 'a Seq.t) =
let rec next v () =
match v () with Seq.Nil -> b () | Seq.Cons (x, xs) -> Seq.Cons (x, next xs)
in
next a

module NUMA = struct
type node = Node of int

Expand All @@ -125,31 +124,43 @@ module NUMA = struct
let compare (Node a) (Node b) = compare a b
end)

(* -1 in 32 bits *)
let unreachable_distance = 0xFFFFFFFF

let self_distance = 10

(* no mutation is exposed in the interface, therefore this is immutable *)
type t = {
distances: int array array
; cpu_to_node: node array
; node_cpus: CPUSet.t array
; all: CPUSet.t
; node_usage: int array
(** Usage across nodes is meant to be balanced when choosing candidates for a VM *)
; candidates: (float * node Seq.t) Seq.t
(** Sequence of all subsets of nodes and the average distance within
the subset, sorted by the latter in increasing order. *)
}

let node_of_int i = Node i

let node_distances d nodes =
let dists =
nodes |> Seq.flat_map (fun n1 -> nodes |> Seq.map (fun n2 -> d.(n1).(n2)))
in
let count, max_dist, sum_dist =
Seq.fold_left
(fun (count, maxv, sum) e -> (count + 1, max maxv e, sum + e))
(0, min_int, 0) dists
in
(* We want to minimize maximum distance first, and average distance next.
When running the VM we don't know which pCPU it'll end up using, and want
to limit the worst case performance. *)
((max_dist, float sum_dist /. float count), nodes)
if Seq.is_empty nodes then
None
psafont marked this conversation as resolved.
Show resolved Hide resolved
else
let dists =
nodes
|> Seq.flat_map (fun n1 -> nodes |> Seq.map (fun n2 -> d.(n1).(n2)))
in
let count, max_dist, sum_dist =
Seq.fold_left
(fun (count, maxv, sum) e -> (count + 1, max maxv e, sum + e))
(0, min_int, 0) dists
in
(* We want to minimize maximum distance first, and average distance next.
When running the VM we don't know which pCPU it'll end up using, and want
to limit the worst case performance. *)
Some ((max_dist, float sum_dist /. float count), nodes)

let dist_cmp (a1, _) (b1, _) = compare a1 b1

Expand All @@ -159,60 +170,96 @@ module NUMA = struct
[n*multiply ... n*multiply + multiply-1], except we always the add the
single NUMA node combinations. *)
(* make sure that single NUMA nodes are always present in the combinations *)
let single_nodes =
let distance_to_candidate d = (d, float d) in
let valid_nodes =
seq_range 0 (Array.length d)
|> Seq.map (fun i -> ((10, 10.0), Seq.return i))
|> Seq.filter_map (fun i ->
let self = d.(i).(i) in
if self <> unreachable_distance then
Some i
else
None
)
in
let numa_nodes = Array.length d in
let numa_nodes = Seq.length valid_nodes in
let nodes =
if numa_nodes > 16 then
(* try just the single nodes, and give up (use all nodes otherwise) to
avoid exponential running time. We could do better here, e.g. by
if numa_nodes > 16 then (
(* Avoid generating too many candidates because of the exponential
running time. We could do better here, e.g. by
reducing the matrix *)
single_nodes
else
numa_nodes
|> seq_gen_2n
|> Seq.map (node_distances d)
|> seq_append single_nodes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the single nodes were always appended just in case something goes wrong with the filtering algorithm (which was meant to be somewhat smarter than brute force, or it may evolve in the future to be somewhat smarter).

As the algorithm currently looks like I agree that we don't need to append the single nodes here. Perhaps this would be a good condition to test for in the testcases, that single (reachable) nodes are always present with the expected value (unless such a test already exists).

D.info
"%s: More than 16 valid NUMA nodes detected, considering only \
individual nodes."
__FUNCTION__ ;
valid_nodes
|> Seq.map (fun i ->
let self = d.(i).(i) in
(distance_to_candidate self, Seq.return i)
)
) else
valid_nodes |> seq_all_subsets |> Seq.filter_map (node_distances d)
in
nodes
|> seq_sort ~cmp:dist_cmp
|> Seq.map (fun ((_, avg), nodes) -> (avg, Seq.map (fun n -> Node n) nodes))

let pp_dump_distances = Fmt.(int |> Dump.array |> Dump.array)

let make ~distances ~cpu_to_node =
debug "Distances: %s" (Fmt.to_to_string pp_dump_distances distances) ;
debug "CPU2Node: %s" (Fmt.to_to_string Fmt.(Dump.array int) cpu_to_node) ;
let ( let* ) = Option.bind in
let node_cpus = Array.map (fun _ -> CPUSet.empty) distances in

(* nothing can be scheduled on unreachable nodes, remove them from the
node_cpus *)
Array.iteri
(fun i node -> node_cpus.(node) <- CPUSet.add i node_cpus.(node))
cpu_to_node ;
Array.iteri
(fun i row ->
let d = distances.(i).(i) in
if d <> 10 then
invalid_arg
(Printf.sprintf "NUMA distance from node to itself must be 10: %d" d) ;
Array.iteri
(fun _ d ->
if d < 10 then
invalid_arg (Printf.sprintf "NUMA distance must be >= 10: %d" d)
)
row
(fun i node ->
let self = distances.(node).(node) in
if self <> unreachable_distance then
node_cpus.(node) <- CPUSet.add i node_cpus.(node)
)
distances ;
let all = Array.fold_left CPUSet.union CPUSet.empty node_cpus in
let candidates = gen_candidates distances in
{
cpu_to_node ;

let* () =
if Array.for_all (fun cpus -> CPUSet.is_empty cpus) node_cpus then (
D.info
"Not enabling NUMA: the ACPI SLIT only contains unreachable nodes." ;
None
) else
Some ()
in

let numa_matrix_is_reasonable =
distances
; cpu_to_node= Array.map node_of_int cpu_to_node
; node_cpus
; all
; node_usage= Array.map (fun _ -> 0) distances
; candidates
}
|> Array.to_seqi
|> Seq.for_all (fun (i, row) ->
let d = distances.(i).(i) in
(d = unreachable_distance || d = self_distance)
&& Array.for_all
(fun d -> d >= self_distance || d = unreachable_distance)
row
)
in

let* () =
if not numa_matrix_is_reasonable then (
D.info
"Not enabling NUMA: the ACPI SLIT table contains values that are \
invalid." ;
None
) else
Some ()
in

let candidates = gen_candidates distances in

let all = Array.fold_left CPUSet.union CPUSet.empty node_cpus in
Some
{
distances
; cpu_to_node= Array.map node_of_int cpu_to_node
; node_cpus
; all
; node_usage= Array.map (fun _ -> 0) distances
; candidates
}

let distance t (Node a) (Node b) = t.distances.(a).(b)

Expand Down
4 changes: 1 addition & 3 deletions ocaml/xenopsd/lib/topology.mli
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ module NUMA : sig
(** A NUMA node index. Distinct from an int to avoid mixing with CPU numbers *)
type node = private Node of int

val make : distances:int array array -> cpu_to_node:int array -> t
val make : distances:int array array -> cpu_to_node:int array -> t option
(** [make distances cpu_to_node] stores the topology. [distances] is a square
matrix [d] where [d.(i).(j)] is an approximation to how much slower it is
to access memory from node [j] when running on node [i]. Distances are
Expand All @@ -97,8 +97,6 @@ module NUMA : sig

NUMA nodes without any CPUs are accepted (to handle hard affinities).

Raises [invalid_arg] if above constraints are not met

A typical matrix might look like this:

10 21
Expand Down
20 changes: 15 additions & 5 deletions ocaml/xenopsd/test/dune
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
(test
(name test)
(modes exe)
(modules :standard \ test_cpuid test_topology)
(package xapi-tools)
(libraries
alcotest
cpuid

fmt
result
rpclib.core
Expand All @@ -15,15 +13,27 @@
xapi-idl.xen.interface.types
xapi-log
xapi-stdext-pervasives
xapi-test-utils
xapi_xenopsd
xenstore_transport.unix
)
(preprocess
(per_module ((pps ppx_deriving_rpc) Test))
)
)

(test
(name test_cpuid)
(modules test_cpuid)
(package xapi-tools)
(libraries alcotest cpuid xapi-test-utils xapi_xenopsd)
)

(test
(name test_topology)
(modules test_topology)
(package xapi-tools)
(libraries alcotest fmt xapi-log xapi_xenopsd)
)

(rule
(alias runtest)
(package xapi-tools)
Expand Down
2 changes: 1 addition & 1 deletion ocaml/xenopsd/test/test.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1032,4 +1032,4 @@ let _ =
)
in
Debug.log_to_stdout () ;
Alcotest.run "xenops test" ([suite; Test_topology.suite] @ Test_cpuid.tests)
Alcotest.run "xenops test" [suite]
2 changes: 2 additions & 0 deletions ocaml/xenopsd/test/test_cpuid.ml
Original file line number Diff line number Diff line change
Expand Up @@ -327,3 +327,5 @@ let tests =
; ("equality", Equality.tests)
; ("comparisons", Comparisons.tests)
]

let () = Alcotest.run "CPUID" tests
Loading
Loading