Skip to content

Commit

Permalink
Add Branch.elementSize()
Browse files Browse the repository at this point in the history
Capture the type dispatch in CNode.elementSize() into a
Branch.elementSize(), which is then implemented by the two types.

As a consequence, the type safety of MainNode.size() is improved as
well.

Signed-off-by: Robert Varga <[email protected]>
  • Loading branch information
rovarga committed Jan 25, 2025
1 parent 7ff546a commit 0076327
Show file tree
Hide file tree
Showing 8 changed files with 63 additions and 34 deletions.
8 changes: 7 additions & 1 deletion triemap/src/main/java/tech/pantheon/triemap/Branch.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,11 @@
* A Branch: either an {@link INode} or an {@link SNode}.
*/
sealed interface Branch<K, V> permits INode, SNode {
// Nothing else
/**
* Return the number of entries for the purposes of {@link CNode#size(ImmutableTrieMap)}.
*
* @param ct TrieMap reference
* @return The actual number of entries
*/
int elementSize(ImmutableTrieMap<K, V> ct);
}
70 changes: 44 additions & 26 deletions triemap/src/main/java/tech/pantheon/triemap/CNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -284,43 +284,61 @@ int trySize() {
}

@Override
int size(final ImmutableTrieMap<?, ?> ct) {
int size(final ImmutableTrieMap<K, V> ct) {
int sz;
return (sz = csize) != NO_SIZE ? sz : (csize = computeSize(ct));
}

// lends itself towards being parallelizable by choosing
// a random starting offset in the array
// => if there are concurrent size computations, they start
// at different positions, so they are more likely to
// to be independent
private int computeSize(final ImmutableTrieMap<?, ?> ct) {
private int computeSize(final ImmutableTrieMap<K, V> ct) {
final int len = array.length;
return switch (len) {
case 0 -> 0;
case 1 -> elementSize(ct, array[0]);
default -> {
final int offset = ThreadLocalRandom.current().nextInt(len);
int sz = 0;
for (int i = offset; i < len; ++i) {
sz += elementSize(ct, array[i]);
}
for (int i = 0; i < offset; ++i) {
sz += elementSize(ct, array[i]);
}
yield sz;
}
case 1 -> array[0].elementSize(ct);
default -> computeSize(ct, array, len);
};
}

private static int elementSize(final ImmutableTrieMap<?, ?> ct, final Branch<?, ?> elem) {
if (elem instanceof SNode) {
return 1;
} else if (elem instanceof INode<?, ?> inode) {
return inode.readSize(ct);
} else {
throw invalidElement(elem);
// Lends itself towards being parallelizable by choosing a random starting offset in the array: if there are
// concurrent size computations, they start at different positions, so they are more likely to be independent
private static <K, V> int computeSize(final ImmutableTrieMap<K, V> ct, final Branch<K, V>[] array, final int len) {
// TODO: The other side of this argument is that array is 2-32 items long, i.e. on OpenJDK 21 on x64 the array
// ends up being 16 + (2-32) * (4/8) == 24-144 / 32-272 bytes each.
//
// When traversing we do not dereference SNodes, but each INode either returns a cached value or goes off
// and branches (via a 16-byte object) branch to (eventually) this code in some other CNode. We also know
// we have at least 2 entries to traverse.
//
// Taking into consideration a modern CPU, with:
// - 12 physical cores: 4 P-cores (2 threads each), 8 E-cores (1 thread each)
// - 64 byte cache line size
// - L1d
// - 48KiB L1d per P-core
// - 32KiB L1d per E-core
// - L2 unified
// - 1.25MiB per P-core
// - 2MiB for each 4 E-cores
// - L3 unified 12MiB
// it would seam that all things being optimal, each thread is using 24-32KiB L1d, 512-1024KiB L2 and
// about 769KiB of L3.
//
// So three things:
// 0) We really would like to prevent L1d bounces, so threads on different cores should be touching
// different cachelines. We are looking at traversing 3-5 linear cache lines.
// 1) Would it make sense to inline the loops below, for example by counting odds and evens into
// separate variables, striding by 2 and then combining the two counters?
// 2) On the other hand, doesn't JIT already take care of this? Is there something we can do better,
// like making sure the starting offset is aligned just by taking less random entropy?
//
// Note: len >= 2 is enforced by the sole caller
final int offset = ThreadLocalRandom.current().nextInt(len);
int sz = 0;
for (int i = offset; i < len; ++i) {
sz += array[i].elementSize(ct);
}
for (int i = 0; i < offset; ++i) {
sz += array[i].elementSize(ct);
}
return sz;
}

private CNode<K, V> updatedAt(final int pos, final Branch<K, V> nn, final Gen ngen) {
Expand Down
3 changes: 2 additions & 1 deletion triemap/src/main/java/tech/pantheon/triemap/INode.java
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,8 @@ INode<K, V> copyToGen(final TrieMap<K, V> ct, final Gen ngen) {
return new INode<>(ngen, gcasRead(ct));
}

int readSize(final ImmutableTrieMap<?, ?> ct) {
@Override
public int elementSize(final ImmutableTrieMap<K, V> ct) {
return gcasReadNonNull(ct).size(ct);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ public V replace(final K key, final V value) {

@Override
public int size() {
return root.readSize(this);
return root.elementSize(this);
}

@Override
Expand Down
2 changes: 1 addition & 1 deletion triemap/src/main/java/tech/pantheon/triemap/LNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ int trySize() {
}

@Override
int size(final ImmutableTrieMap<?, ?> ct) {
int size(final ImmutableTrieMap<K, V> ct) {
return size;
}
}
5 changes: 2 additions & 3 deletions triemap/src/main/java/tech/pantheon/triemap/MainNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,10 @@ abstract sealed class MainNode<K, V> extends INode.TryGcas<K, V> permits CNode,
abstract int trySize();

/**
* Return the number of entries in this node, traversing it if need be. This method should be invoked only
* on immutable snapshots.
* Return the number of entries in this node, traversing it if need be.
*
* @param ct TrieMap reference
* @return The actual number of entries.
*/
abstract int size(ImmutableTrieMap<?, ?> ct);
abstract int size(ImmutableTrieMap<K, V> ct);
}
5 changes: 5 additions & 0 deletions triemap/src/main/java/tech/pantheon/triemap/SNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ boolean matches(final int otherHc, final Object otherKey) {
return new Result<>(value);
}

@Override
public int elementSize(final ImmutableTrieMap<K, V> ct) {
return 1;
}

@Override
public int hashCode() {
return AbstractEntry.hashCode(key, value);
Expand Down
2 changes: 1 addition & 1 deletion triemap/src/main/java/tech/pantheon/triemap/TNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ int trySize() {
}

@Override
int size(final ImmutableTrieMap<?, ?> ct) {
int size(final ImmutableTrieMap<K, V> ct) {
return 1;
}

Expand Down

0 comments on commit 0076327

Please sign in to comment.