nixpkgs/nixos/modules/services/networking/llama-swap.nix
06kellyjac 110edff547 llama-swap: init module
Co-authored-by: podium868909 <89096245@proton.me>
2025-09-03 22:15:38 +02:00

125 lines
3.6 KiB
Nix

{
config,
lib,
pkgs,
...
}:
let
cfg = config.services.llama-swap;
settingsFormat = pkgs.formats.yaml { };
configFile = settingsFormat.generate "config.yaml" cfg.settings;
in
{
options.services.llama-swap = {
enable = lib.mkEnableOption "enable the llama-swap service";
package = lib.mkPackageOption pkgs "llama-swap" { };
port = lib.mkOption {
default = 8080;
example = 11343;
type = lib.types.port;
description = ''
Port that llama-swap listens on.
'';
};
openFirewall = lib.mkOption {
type = lib.types.bool;
default = false;
description = ''
Whether to open the firewall for llama-swap.
This adds {option}`port` to [](#opt-networking.firewall.allowedTCPPorts).
'';
};
settings = lib.mkOption {
type = lib.types.submodule { freeformType = settingsFormat.type; };
description = ''
llama-swap configuration. Refer to the [llama-swap example configuration](https://github.com/mostlygeek/llama-swap/blob/main/config.example.yaml)
for details on supported values.
'';
example = lib.literalExpression ''
let
llama-cpp = pkgs.llama-cpp.override { rocmSupport = true; };
llama-server = lib.getExe' llama-cpp "llama-server";
in
{
healthCheckTimeout = 60;
models = {
"some-model" = {
cmd = "$\{llama-server\} --port ''\${PORT} -m /var/lib/llama-cpp/models/some-model.gguf -ngl 0 --no-webui";
aliases = [
"the-best"
];
};
"other-model" = {
proxy = "http://127.0.0.1:5555";
cmd = "$\{llama-server\} --port 5555 -m /var/lib/llama-cpp/models/other-model.gguf -ngl 0 -c 4096 -np 4 --no-webui";
concurrencyLimit = 4;
};
};
};
'';
};
};
config = lib.mkIf cfg.enable {
systemd.services.llama-swap = {
description = "Model swapping for LLaMA C++ Server (or any local OpenAPI compatible server)";
after = [ "network.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "exec";
ExecStart = "${lib.getExe cfg.package} --listen :${toString cfg.port} --config ${configFile}";
Restart = "on-failure";
RestartSec = 3;
# for GPU acceleration
PrivateDevices = false;
# hardening
DynamicUser = true;
CapabilityBoundingSet = "";
RestrictAddressFamilies = [
"AF_INET"
"AF_INET6"
"AF_UNIX"
];
NoNewPrivileges = true;
PrivateMounts = true;
PrivateTmp = true;
PrivateUsers = true;
ProtectClock = true;
ProtectControlGroups = true;
ProtectHome = true;
ProtectKernelLogs = true;
ProtectKernelModules = true;
ProtectKernelTunables = true;
ProtectSystem = "strict";
MemoryDenyWriteExecute = true;
LockPersonality = true;
RemoveIPC = true;
RestrictNamespaces = true;
RestrictRealtime = true;
RestrictSUIDSGID = true;
SystemCallArchitectures = "native";
SystemCallFilter = [
"@system-service"
"~@privileged"
];
SystemCallErrorNumber = "EPERM";
ProtectProc = "invisible";
ProtectHostname = true;
ProcSubset = "pid";
};
};
networking.firewall = lib.mkIf cfg.openFirewall { allowedTCPPorts = [ cfg.port ]; };
};
meta.maintainers = with lib.maintainers; [
jk
podium868909
];
}