Skip to content

Commit

Permalink
feat: improve GPU support when multiple GPUs (#2238)
Browse files Browse the repository at this point in the history
If there are multiple GPUs use the first one that is of
known type instead of first GPU. If no GPUs of are known
type fall back to the first GPU as before.

Add another string for vendor that is accepted as an NVIDIA
GPU when doing GPU detection based on what was seen on linux
with an NVIDIA 4070 Ti Super

Signed-off-by: Michael Dawson <[email protected]>
  • Loading branch information
mhdawson authored Dec 11, 2024
1 parent e3a34d5 commit ce6844b
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 3 deletions.
22 changes: 22 additions & 0 deletions packages/backend/src/managers/GPUManager.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,25 @@ test('NVIDIA controller should return intel vendor', async () => {
},
]);
});

test('NVIDIA controller can have vendor "NVIDIA Corporation"', async () => {
vi.mocked(graphics).mockResolvedValue({
controllers: [
{
vendor: 'NVIDIA Corporation',
model: 'NVIDIA GeForce GTX 1060 6GB',
vram: 6144,
} as unknown as Systeminformation.GraphicsControllerData,
],
displays: [],
});

const manager = new GPUManager(webviewMock);
expect(await manager.collectGPUs()).toStrictEqual([
{
vendor: GPUVendor.NVIDIA,
model: 'NVIDIA GeForce GTX 1060 6GB',
vram: 6144,
},
]);
});
1 change: 1 addition & 0 deletions packages/backend/src/managers/GPUManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ export class GPUManager extends Publisher<IGPUInfo[]> implements Disposable {
case 'Intel Corporation':
return GPUVendor.INTEL;
case 'NVIDIA':
case 'NVIDIA Corporation':
return GPUVendor.NVIDIA;
case 'Apple':
return GPUVendor.APPLE;
Expand Down
94 changes: 94 additions & 0 deletions packages/backend/src/workers/provider/LlamaCppPython.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,100 @@ describe('perform', () => {
expect(server.labels['gpu']).toBe('nvidia');
});

test('gpu experimental should collect GPU data and find first supported gpu - entry 1 supported', async () => {
vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
experimentalGPU: true,
modelsPath: '',
apiPort: 10434,
experimentalTuning: false,
modelUploadDisabled: false,
showGPUPromotion: false,
});

vi.mocked(gpuManager.collectGPUs).mockResolvedValue([
{
vram: 1024,
model: 'dummy-model',
vendor: GPUVendor.UNKNOWN,
},
{
vram: 1024,
model: 'nvidia',
vendor: GPUVendor.NVIDIA,
},
]);

const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry);
const server = await provider.perform({
port: 8000,
image: undefined,
labels: {},
modelsInfo: [DummyModel],
connection: undefined,
});

expect(containerEngine.createContainer).toHaveBeenCalledWith(
DummyImageInfo.engineId,
expect.objectContaining({
Cmd: [
'-c',
'/usr/bin/ln -sfn /usr/lib/wsl/lib/* /usr/lib64/ && PATH="${PATH}:/usr/lib/wsl/lib/" && chmod 755 ./run.sh && ./run.sh',
],
}),
);
expect(gpuManager.collectGPUs).toHaveBeenCalled();
expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.cuda, expect.any(Function));
expect('gpu' in server.labels).toBeTruthy();
expect(server.labels['gpu']).toBe('nvidia');
});

test('gpu experimental should collect GPU data and find first supported gpu - entry 0 supported', async () => {
vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
experimentalGPU: true,
modelsPath: '',
apiPort: 10434,
experimentalTuning: false,
modelUploadDisabled: false,
showGPUPromotion: false,
});

vi.mocked(gpuManager.collectGPUs).mockResolvedValue([
{
vram: 1024,
model: 'nvidia',
vendor: GPUVendor.NVIDIA,
},
{
vram: 1024,
model: 'dummy-model',
vendor: GPUVendor.UNKNOWN,
},
]);

const provider = new LlamaCppPython(taskRegistry, podmanConnection, gpuManager, configurationRegistry);
const server = await provider.perform({
port: 8000,
image: undefined,
labels: {},
modelsInfo: [DummyModel],
connection: undefined,
});

expect(containerEngine.createContainer).toHaveBeenCalledWith(
DummyImageInfo.engineId,
expect.objectContaining({
Cmd: [
'-c',
'/usr/bin/ln -sfn /usr/lib/wsl/lib/* /usr/lib64/ && PATH="${PATH}:/usr/lib/wsl/lib/" && chmod 755 ./run.sh && ./run.sh',
],
}),
);
expect(gpuManager.collectGPUs).toHaveBeenCalled();
expect(getImageInfo).toHaveBeenCalledWith(expect.anything(), llamacpp.cuda, expect.any(Function));
expect('gpu' in server.labels).toBeTruthy();
expect(server.labels['gpu']).toBe('nvidia');
});

test('unknown gpu on unsupported vmtype should not provide gpu labels', async () => {
vi.mocked(configurationRegistry.getExtensionConfiguration).mockReturnValue({
experimentalGPU: true,
Expand Down
7 changes: 4 additions & 3 deletions packages/backend/src/workers/provider/LlamaCppPython.ts
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,10 @@ export class LlamaCppPython extends InferenceProvider {
if (this.configurationRegistry.getExtensionConfiguration().experimentalGPU) {
const gpus: IGPUInfo[] = await this.gpuManager.collectGPUs();
if (gpus.length === 0) throw new Error('no gpu was found.');
if (gpus.length > 1)
console.warn(`found ${gpus.length} gpus: using multiple GPUs is not supported. Using ${gpus[0].model}.`);
gpu = gpus[0];

// Look for a GPU that is of a known type, use the first one found.
// Fall back to the first one if no GPUs are of known type.
gpu = gpus.find(({ vendor }) => vendor !== GPUVendor.UNKNOWN) ?? gpus[0];
}

let connection: ContainerProviderConnection | undefined = undefined;
Expand Down

0 comments on commit ce6844b

Please sign in to comment.