-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMakefile
102 lines (81 loc) · 2.84 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
update:
poetry install
git submodule update --init --recursive
update.vendor:
cd vendor/llama.cpp && git pull origin master
deps:
python3 -m pip install --upgrade pip
python3 -m pip install -e ".[all]"
build:
python3 -m pip install --verbose -e .
build.debug:
python3 -m pip install \
--verbose \
--config-settings=cmake.verbose=true \
--config-settings=logging.level=INFO \
--config-settings=install.strip=false \
--config-settings=cmake.args="-DCMAKE_BUILD_TYPE=Debug;-DCMAKE_C_FLAGS='-ggdb -O0';-DCMAKE_CXX_FLAGS='-ggdb -O0'" \
--editable .
build.debug.extra:
python3 -m pip install \
--verbose \
--config-settings=cmake.verbose=true \
--config-settings=logging.level=INFO \
--config-settings=install.strip=false \
--config-settings=cmake.args="-DCMAKE_BUILD_TYPE=Debug;-DCMAKE_C_FLAGS='-fsanitize=address -ggdb -O0';-DCMAKE_CXX_FLAGS='-fsanitize=address -ggdb -O0'" \
--editable .
build.cuda:
CMAKE_ARGS="-DGGML_CUDA=on" python3 -m pip install --verbose -e .
build.openblas:
CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" python3 -m pip install --verbose -e .
test:
python3 -m pytest --full-trace -v
test-docker:
docker run --rm nekko-api:latest /bin/sh -c "python3 -m pip install -e \".[all]\" && python3 -m pytest --full-trace -v"
docker:
docker build -t nekko-api:latest -f docker/simple/Dockerfile .
run-server:
python3 -m llama_cpp.server --model ${MODEL}
run:
python3 -m llama_cpp.server --config_file=./examples/settings.json
run-example-docker: example-models
docker run \
-v ./models:/app/models \
-v ./examples/settings.json:/app/settings.json \
--cap-add SYS_RESOURCE \
-p 8000:8000 \
-e CONFIG_FILE=settings.json \
-it \
nekko-api
run-demo: example-models
docker compose -f docker/web/docker-compose.yml up
example-models: models/SmolLM2-135M-Instruct-Q6_K.gguf models/Llama-3.2-1B-Instruct-Q5_K_S.gguf models/OLMo-7B-Instruct-hf-0724-Q4_K.gguf
models/SmolLM2-135M-Instruct-Q6_K.gguf: | models
curl -L https://huggingface.co/lmstudio-community/SmolLM2-135M-Instruct-GGUF/resolve/main/SmolLM2-135M-Instruct-Q6_K.gguf -o $@
models/Llama-3.2-1B-Instruct-Q5_K_S.gguf: | models
curl -L https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q5_K_S.gguf -o $@
models/OLMo-7B-Instruct-hf-0724-Q4_K.gguf: | models
curl -L https://huggingface.co/aifoundry-org/OLMo-7B-0724-Instruct-hf-Quantized/resolve/main/OLMo-7B-Instruct-hf-0724-Q4_K.gguf -o $@
models:
mkdir models
clean:
- cd vendor/llama.cpp && make clean
- cd vendor/llama.cpp && rm libllama.so
- rm -rf _skbuild
- rm llama_cpp/lib/*.so
- rm llama_cpp/lib/*.dylib
- rm llama_cpp/lib/*.metal
- rm llama_cpp/lib/*.dll
- rm llama_cpp/lib/*.lib
- rm -rf models
.PHONY: \
update \
update.vendor \
build \
build.cuda \
build.openblas \
test \
docker \
run-server \
run-example-docker \
clean