mirror of
https://github.com/NixOS/nixpkgs.git
synced 2025-11-10 17:54:53 +01:00
259 lines
9.2 KiB
Nix
259 lines
9.2 KiB
Nix
{ pkgs, lib, ... }:
|
|
|
|
let
|
|
wrapSrc = attrs: pkgs.runCommand "${attrs.pname}-${attrs.version}" attrs "ln -s $src $out";
|
|
|
|
smollm2-135m = wrapSrc rec {
|
|
pname = "smollm2-135m";
|
|
version = "9e6855bc4be717fca1ef21360a1db4b29d5c559a";
|
|
src = pkgs.fetchurl {
|
|
url = "https://huggingface.co/unsloth/SmolLM2-135M-Instruct-GGUF/resolve/${version}/SmolLM2-135M-Instruct-Q4_K_M.gguf";
|
|
hash = "sha256-7V+jDEh7KC7BVsKQYvEiLlwgh1qUSsmCidvSQulH90c=";
|
|
};
|
|
|
|
meta.license = with lib.licenses; [
|
|
asl20 # actual license of the model
|
|
unfree # to force an opt-in - do not remove
|
|
];
|
|
};
|
|
|
|
# grab allowUnfreePredicate if it exists or default deny
|
|
allowUnfreePredicate =
|
|
if builtins.hasAttr "allowUnfreePredicate" pkgs.config then
|
|
pkgs.config.allowUnfreePredicate
|
|
else
|
|
(_: false);
|
|
|
|
# check if we can use smollm2-135m taking either globally allowUnfree or
|
|
# explicit allow with predicate
|
|
useSmollm2-135m = pkgs.config.allowUnfree || allowUnfreePredicate smollm2-135m;
|
|
in
|
|
{
|
|
name = "llama-swap";
|
|
meta.maintainers = with lib.maintainers; [
|
|
jk
|
|
podium868909
|
|
];
|
|
|
|
nodes = {
|
|
machine =
|
|
{ pkgs, ... }:
|
|
{
|
|
# running models can be memory intensive but
|
|
# default `virtualisation.memorySize` is fine
|
|
|
|
services.llama-swap = {
|
|
enable = true;
|
|
settings =
|
|
# config for basic tests
|
|
if !useSmollm2-135m then
|
|
{ }
|
|
# config for extended tests using SmolLM2
|
|
else
|
|
let
|
|
llama-cpp = pkgs.llama-cpp;
|
|
llama-server = lib.getExe' llama-cpp "llama-server";
|
|
in
|
|
{
|
|
hooks.on_startup.preload = [
|
|
"smollm2"
|
|
];
|
|
# temperature and top-k important for SmolLM2 performance/accuracy
|
|
models = {
|
|
"smollm2" = {
|
|
ttl = 10;
|
|
cmd = "${llama-server} --port \${PORT} -m ${smollm2-135m} --no-webui --temp 0.2 --top-k 9";
|
|
};
|
|
"smollm2-group-1" = {
|
|
cmd = "${llama-server} --port \${PORT} -m ${smollm2-135m} --no-webui --temp 0.2 --top-k 9 -c 1024";
|
|
};
|
|
"smollm2-group-2" = {
|
|
proxy = "http://127.0.0.1:5802";
|
|
cmd = "${llama-server} --port 5802 -m ${smollm2-135m} --no-webui --temp 0.2 --top-k 9 -c 1024";
|
|
};
|
|
};
|
|
groups = {
|
|
"standalone" = {
|
|
swap = true;
|
|
exclusive = true;
|
|
members = [
|
|
"smollm2"
|
|
];
|
|
};
|
|
"group" = {
|
|
swap = false;
|
|
exclusive = true;
|
|
members = [
|
|
"smollm2-group-1"
|
|
"smollm2-group-2"
|
|
];
|
|
};
|
|
};
|
|
};
|
|
};
|
|
};
|
|
};
|
|
|
|
testScript =
|
|
{ nodes, ... }:
|
|
''
|
|
# core tests
|
|
import json
|
|
|
|
def get_json(route):
|
|
args = [
|
|
'-v',
|
|
'-s',
|
|
'--fail',
|
|
'-H "Content-Type: application/json"'
|
|
]
|
|
return json.loads(machine.succeed("curl {args} http://localhost:8080{route}".format(args=" ".join(args), route=route)))
|
|
|
|
def post_json(route, data):
|
|
args = [
|
|
'-v',
|
|
'-s',
|
|
'--fail',
|
|
'-H "Content-Type: application/json"',
|
|
'-H "Authorization: Bearer no-key"',
|
|
"-d '{d}'".format(d=json.dumps(data))
|
|
]
|
|
return json.loads(machine.succeed('curl {args} http://localhost:8080{route}'.format(args=" ".join(args), route=route)))
|
|
|
|
machine.wait_for_unit('llama-swap')
|
|
machine.wait_for_open_port(8080)
|
|
|
|
with subtest('check is serving ui'):
|
|
machine.succeed('curl --fail http:/localhost:8080/ui/')
|
|
|
|
with subtest('check is healthy'):
|
|
machine.wait_until_succeeds('curl --silent --fail http://localhost:8080/health | grep "OK"')
|
|
|
|
''
|
|
+ lib.optionalString useSmollm2-135m ''
|
|
# extended tests using SmolLM2
|
|
with subtest('check `/running` for preloaded smollm2'):
|
|
machine.wait_until_succeeds('curl --silent --fail http://localhost:8080/running | grep "smollm2"')
|
|
running_response = get_json('/running')
|
|
assert len(running_response['running']) == 1
|
|
running_model = running_response['running'][0]
|
|
assert running_model['model'] == 'smollm2'
|
|
assert running_model['state'] == 'ready'
|
|
|
|
with subtest('runs smollm2'):
|
|
response = None
|
|
with subtest('send request to smollm2'):
|
|
data = {
|
|
'model': 'smollm2',
|
|
'messages': [
|
|
{
|
|
'role': 'user',
|
|
'content': 'Say hello'
|
|
}
|
|
]
|
|
}
|
|
response = post_json('/v1/chat/completions', data)
|
|
|
|
with subtest('response is from smollm2'):
|
|
assert response['model'] == 'smollm2'
|
|
|
|
with subtest('response contains at least one item in "choices"'):
|
|
assert len(response['choices']) >= 1
|
|
|
|
assistant_choices = None
|
|
with subtest('response contains at least one "assistant" message'):
|
|
assistant_choices = [c for c in response['choices'] if c['message']['role'] == 'assistant']
|
|
assert len(assistant_choices) >= 1
|
|
|
|
with subtest('first message (lowercase) starts with "hello"'):
|
|
assert assistant_choices[0]['message']['content'].lower()[:5] == 'hello'
|
|
|
|
with subtest('check `/running` for just smollm2'):
|
|
running_response = get_json('/running')
|
|
assert len(running_response['running']) == 1
|
|
running_model = running_response['running'][0]
|
|
assert running_model['model'] == 'smollm2'
|
|
assert running_model['state'] == 'ready'
|
|
|
|
with subtest('check `/running` for smollm2 to timeout'):
|
|
machine.succeed('curl --silent --fail http://localhost:8080/running | grep "smollm2"')
|
|
machine.wait_until_succeeds('curl --silent --fail http://localhost:8080/running | grep -v "smollm2"', timeout=11)
|
|
running_response = get_json('/running')
|
|
assert len(running_response['running']) == 0
|
|
|
|
with subtest('runs smollm2-group-1 and smollm2-group-2'):
|
|
response_1 = None
|
|
with subtest('send request to smollm2-group-1'):
|
|
data = {
|
|
'model': 'smollm2-group-1',
|
|
'messages': [
|
|
{
|
|
'role': 'user',
|
|
'content': 'Say hello'
|
|
}
|
|
]
|
|
}
|
|
response_1 = post_json('/v1/chat/completions', data)
|
|
|
|
with subtest('response 1 is from smollm2-group-1'):
|
|
assert response_1['model'] == 'smollm2-group-1'
|
|
|
|
with subtest('response 1 contains at least one item in "choices"'):
|
|
assert len(response['choices']) >= 1
|
|
|
|
assistant_choices_1 = None
|
|
with subtest('response 1 contains at least one "assistant" message'):
|
|
assistant_choices_1 = [c for c in response_1['choices'] if c['message']['role'] == 'assistant']
|
|
assert len(assistant_choices_1) >= 1
|
|
|
|
with subtest('first message (lowercase) in response 1 starts with "hello"'):
|
|
assert assistant_choices_1[0]['message']['content'].lower()[:5] == 'hello'
|
|
|
|
with subtest('check `/running` for just smollm2-group-1'):
|
|
running_response = get_json('/running')
|
|
assert len(running_response['running']) == 1
|
|
running_model = running_response['running'][0]
|
|
assert running_model['model'] == 'smollm2-group-1'
|
|
assert running_model['state'] == 'ready'
|
|
|
|
response_2 = None
|
|
with subtest('send request to smollm2-group-2'):
|
|
data = {
|
|
'model': 'smollm2-group-2',
|
|
'messages': [
|
|
{
|
|
'role': 'user',
|
|
'content': 'Say hello'
|
|
}
|
|
]
|
|
}
|
|
response_2 = post_json('/v1/chat/completions', data)
|
|
|
|
with subtest('response 2 is from smollm2-group-2'):
|
|
assert response_2['model'] == 'smollm2-group-2'
|
|
|
|
with subtest('response 2 contains at least one item in "choices"'):
|
|
assert len(response['choices']) >= 1
|
|
|
|
assistant_choices_2 = None
|
|
with subtest('response 2 contains at least one "assistant" message'):
|
|
assistant_choices_2 = [c for c in response_2['choices'] if c['message']['role'] == 'assistant']
|
|
assert len(assistant_choices_2) >= 1
|
|
|
|
with subtest('first message (lowercase) in response 1 starts with "hello"'):
|
|
assert assistant_choices_2[0]['message']['content'].lower()[:5] == 'hello'
|
|
|
|
with subtest('check `/running` for both smollm2-group-1 and smollm2-group-2'):
|
|
running_response = get_json('/running')['running']
|
|
assert len(running_response) == 2
|
|
assert len([
|
|
rm for rm in running_response
|
|
if rm['state'] == 'ready' and rm['model'] == 'smollm2-group-1'
|
|
]) == 1
|
|
assert len([
|
|
rm for rm in running_response
|
|
if rm['state'] == 'ready' and rm['model'] == 'smollm2-group-2'
|
|
]) == 1
|
|
'';
|
|
}
|