Skip to content

Commit

Permalink
Improve block find performance (llvm#412)
Browse files Browse the repository at this point in the history
  • Loading branch information
b-sumner authored Feb 3, 2025
1 parent af88610 commit e4b15ce
Showing 1 changed file with 17 additions and 25 deletions.
42 changes: 17 additions & 25 deletions amd/device-libs/ockl/src/dm.cl
Original file line number Diff line number Diff line change
Expand Up @@ -51,26 +51,25 @@ struct kind_info_s {
uint first_unusable;
uint gap_unusable;
uint pattern_unusable;
uint spread_factor;
};

static const __constant struct kind_info_s kinfo[NUM_KINDS] = {
{ /* 0: 16 */ 130054, 129546, 110114, 16288, 6, 256, 0x00000000, 4195 },
{ /* 1: 24 */ 86927, 86758, 73744, 10904, 399, 512, 0x00000000, 2804 },
{ /* 2: 32 */ 65280, 64770, 55054, 8192, 0, 128, 0x00000000, 2107 },
{ /* 3: 48 */ 43576, 43406, 36895, 5504, 56, 256, 0x00000000, 1405 },
{ /* 4: 64 */ 32703, 32193, 27364, 4160, 63, 64, 0x00000000, 1054 },
{ /* 5: 96 */ 21816, 21646, 18399, 2816, 56, 128, 0x00000000, 703 },
{ /* 6: 128 */ 16367, 15856, 13477, 2176, 15, 32, 0x00008000, 527 },
{ /* 7: 192 */ 10915, 10745, 9133, 1472, 35, 64, 0x00000000, 352 },
{ /* 8: 256 */ 8187, 7676, 6524, 1280, 11, 16, 0x08000800, 265 },
{ /* 9: 384 */ 5459, 5289, 4495, 896, 19, 32, 0x00080000, 176 },
{ /* 10: 512 */ 4094, 3583, 3045, 1024, 6, 8, 0x40404040, 133 },
{ /* 11: 768 */ 2730, 2560, 2176, 512, 10, 16, 0x04000400, 89 },
{ /* 12: 1024 */ 2047, 1536, 1305, 1024, 3, 4, 0x88888888, 66 },
{ /* 13: 1536 */ 1365, 1195, 1015, 512, 5, 8, 0x20202020, 44 },
{ /* 14: 2048 */ 1023, 512, 435, 2048, 1, 2, 0xaaaaaaaa, 34 },
{ /* 15: 3072 */ 682, 512, 435, 2048, 2, 4, 0x44444444, 35 },
{ /* 0: 16 */ 130054, 129546, 110114, 16288, 6, 256, 0x00000000 },
{ /* 1: 24 */ 86927, 86758, 73744, 10904, 399, 512, 0x00000000 },
{ /* 2: 32 */ 65280, 64770, 55054, 8192, 0, 128, 0x00000000 },
{ /* 3: 48 */ 43576, 43406, 36895, 5504, 56, 256, 0x00000000 },
{ /* 4: 64 */ 32703, 32193, 27364, 4160, 63, 64, 0x00000000 },
{ /* 5: 96 */ 21816, 21646, 18399, 2816, 56, 128, 0x00000000 },
{ /* 6: 128 */ 16367, 15856, 13477, 2176, 15, 32, 0x00008000 },
{ /* 7: 192 */ 10915, 10745, 9133, 1472, 35, 64, 0x00000000 },
{ /* 8: 256 */ 8187, 7676, 6524, 1280, 11, 16, 0x08000800 },
{ /* 9: 384 */ 5459, 5289, 4495, 896, 19, 32, 0x00080000 },
{ /* 10: 512 */ 4094, 3583, 3045, 1024, 6, 8, 0x40404040 },
{ /* 11: 768 */ 2730, 2560, 2176, 512, 10, 16, 0x04000400 },
{ /* 12: 1024 */ 2047, 1536, 1305, 1024, 3, 4, 0x88888888 },
{ /* 13: 1536 */ 1365, 1195, 1015, 512, 5, 8, 0x20202020 },
{ /* 14: 2048 */ 1023, 512, 435, 2048, 1, 2, 0xaaaaaaaa },
{ /* 15: 3072 */ 682, 512, 435, 2048, 2, 4, 0x44444444 },
};

// A slab is a chunk of memory used to provide "block"s whose addresses are
Expand Down Expand Up @@ -241,13 +240,6 @@ pattern_unusable(kind_t k)
return kinfo[k].pattern_unusable;
}

// The multiplier used to spread out the probes of individual lanes while searching a slab of kind k
static uint
spread_factor(kind_t k)
{
return kinfo[k].spread_factor;
}

// The number of active lanes at this point
static uint
active_lane_count(void)
Expand Down Expand Up @@ -849,7 +841,7 @@ block_find(__global sdata_t *sdp)
uint i = 0;
if (aid == 0)
i = AFA(&sp->start, nactive, memory_order_relaxed);
i = ((first(i) + aid) * spread_factor(k) % num_blocks(k)) >> 5;
i = (((first(i) + aid) << 5) % num_blocks(k)) >> 5;

uint n = (num_blocks(k) + 31) >> 5;

Expand Down

0 comments on commit e4b15ce

Please sign in to comment.