mirror of
https://github.com/NixOS/nixpkgs.git
synced 2025-11-10 17:54:53 +01:00
125 lines
3.6 KiB
Nix
125 lines
3.6 KiB
Nix
{
|
|
config,
|
|
lib,
|
|
pkgs,
|
|
...
|
|
}:
|
|
let
|
|
cfg = config.services.llama-swap;
|
|
settingsFormat = pkgs.formats.yaml { };
|
|
configFile = settingsFormat.generate "config.yaml" cfg.settings;
|
|
in
|
|
{
|
|
options.services.llama-swap = {
|
|
enable = lib.mkEnableOption "enable the llama-swap service";
|
|
|
|
package = lib.mkPackageOption pkgs "llama-swap" { };
|
|
|
|
port = lib.mkOption {
|
|
default = 8080;
|
|
example = 11343;
|
|
type = lib.types.port;
|
|
description = ''
|
|
Port that llama-swap listens on.
|
|
'';
|
|
};
|
|
|
|
openFirewall = lib.mkOption {
|
|
type = lib.types.bool;
|
|
default = false;
|
|
description = ''
|
|
Whether to open the firewall for llama-swap.
|
|
This adds {option}`port` to [](#opt-networking.firewall.allowedTCPPorts).
|
|
'';
|
|
};
|
|
|
|
settings = lib.mkOption {
|
|
type = lib.types.submodule { freeformType = settingsFormat.type; };
|
|
description = ''
|
|
llama-swap configuration. Refer to the [llama-swap example configuration](https://github.com/mostlygeek/llama-swap/blob/main/config.example.yaml)
|
|
for details on supported values.
|
|
'';
|
|
example = lib.literalExpression ''
|
|
let
|
|
llama-cpp = pkgs.llama-cpp.override { rocmSupport = true; };
|
|
llama-server = lib.getExe' llama-cpp "llama-server";
|
|
in
|
|
{
|
|
healthCheckTimeout = 60;
|
|
models = {
|
|
"some-model" = {
|
|
cmd = "$\{llama-server\} --port ''\${PORT} -m /var/lib/llama-cpp/models/some-model.gguf -ngl 0 --no-webui";
|
|
aliases = [
|
|
"the-best"
|
|
];
|
|
};
|
|
"other-model" = {
|
|
proxy = "http://127.0.0.1:5555";
|
|
cmd = "$\{llama-server\} --port 5555 -m /var/lib/llama-cpp/models/other-model.gguf -ngl 0 -c 4096 -np 4 --no-webui";
|
|
concurrencyLimit = 4;
|
|
};
|
|
};
|
|
};
|
|
'';
|
|
};
|
|
};
|
|
config = lib.mkIf cfg.enable {
|
|
systemd.services.llama-swap = {
|
|
description = "Model swapping for LLaMA C++ Server (or any local OpenAPI compatible server)";
|
|
after = [ "network.target" ];
|
|
wantedBy = [ "multi-user.target" ];
|
|
|
|
serviceConfig = {
|
|
Type = "exec";
|
|
ExecStart = "${lib.getExe cfg.package} --listen :${toString cfg.port} --config ${configFile}";
|
|
Restart = "on-failure";
|
|
RestartSec = 3;
|
|
|
|
# for GPU acceleration
|
|
PrivateDevices = false;
|
|
|
|
# hardening
|
|
DynamicUser = true;
|
|
CapabilityBoundingSet = "";
|
|
RestrictAddressFamilies = [
|
|
"AF_INET"
|
|
"AF_INET6"
|
|
"AF_UNIX"
|
|
];
|
|
NoNewPrivileges = true;
|
|
PrivateMounts = true;
|
|
PrivateTmp = true;
|
|
PrivateUsers = true;
|
|
ProtectClock = true;
|
|
ProtectControlGroups = true;
|
|
ProtectHome = true;
|
|
ProtectKernelLogs = true;
|
|
ProtectKernelModules = true;
|
|
ProtectKernelTunables = true;
|
|
ProtectSystem = "strict";
|
|
MemoryDenyWriteExecute = true;
|
|
LockPersonality = true;
|
|
RemoveIPC = true;
|
|
RestrictNamespaces = true;
|
|
RestrictRealtime = true;
|
|
RestrictSUIDSGID = true;
|
|
SystemCallArchitectures = "native";
|
|
SystemCallFilter = [
|
|
"@system-service"
|
|
"~@privileged"
|
|
];
|
|
SystemCallErrorNumber = "EPERM";
|
|
ProtectProc = "invisible";
|
|
ProtectHostname = true;
|
|
ProcSubset = "pid";
|
|
};
|
|
};
|
|
networking.firewall = lib.mkIf cfg.openFirewall { allowedTCPPorts = [ cfg.port ]; };
|
|
};
|
|
|
|
meta.maintainers = with lib.maintainers; [
|
|
jk
|
|
podium868909
|
|
];
|
|
}
|