From 6d7caf76cc17f44ef1be232a0c9386c7e7f707c9 Mon Sep 17 00:00:00 2001 From: Max Date: Wed, 12 Apr 2023 16:48:03 +0200 Subject: [PATCH 1/6] aipython3: patch torch-bin for dynamic libdrm location --- modules/aipython3/overlays.nix | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/aipython3/overlays.nix b/modules/aipython3/overlays.nix index ac3a0a2..02ad03e 100644 --- a/modules/aipython3/overlays.nix +++ b/modules/aipython3/overlays.nix @@ -95,6 +95,10 @@ pkgs: { url = "https://download.pytorch.org/whl/rocm5.1.1/torch-1.13.1%2Brocm5.1.1-cp310-cp310-linux_x86_64.whl"; hash = "sha256-qUwAL3L9ODy9hjne8jZQRoG4BxvXXLT7cAy9RbM837A="; }; + postFixup = (old.postFixup or "") + '' + ${pkgs.gnused}/bin/sed -i s,/opt/amdgpu/share/libdrm/amdgpu.ids,/tmp/nix-pytorch-rocm___/amdgpu.ids,g $out/${final.python.sitePackages}/torch/lib/libdrm_amdgpu.so + ''; + rocmSupport = true; }); torchvision-bin = prev.torchvision-bin.overrideAttrs (old: { src = pkgs.fetchurl { From 781827ea87efa4b4640e5054b9bebac6e632e127 Mon Sep 17 00:00:00 2001 From: Max Date: Wed, 12 Apr 2023 16:49:15 +0200 Subject: [PATCH 2/6] invokeai: apply ROCm workarounds --- projects/invokeai/package.nix | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/projects/invokeai/package.nix b/projects/invokeai/package.nix index 0fcd5cc..9fd4ab0 100644 --- a/projects/invokeai/package.nix +++ b/projects/invokeai/package.nix @@ -2,6 +2,8 @@ # misc , lib , src +# extra deps +, libdrm }: let @@ -70,6 +72,18 @@ aipython3.buildPythonPackage { fi ' '' + ] ++ lib.optionals (aipython3.torch.rocmSupport or false) [ + '' --run ' + if [ ! -e /tmp/nix-pytorch-rocm___/amdgpu.ids ] + then + mkdir -p /tmp/nix-pytorch-rocm___ + ln -s ${libdrm}/share/libdrm/amdgpu.ids /tmp/nix-pytorch-rocm___/amdgpu.ids + fi + ' + '' + # See note about consumer GPUs: + # https://docs.amd.com/bundle/ROCm-Deep-Learning-Guide-v5.4.3/page/Troubleshooting.html + " --set-default HSA_OVERRIDE_GFX_VERSION 10.3.0" ]; patchPhase = '' runHook prePatch From 44209b89f9943ea270a48712112134179e8c62d8 Mon Sep 17 00:00:00 2001 From: Max Date: Wed, 12 Apr 2023 16:49:54 +0200 Subject: [PATCH 3/6] invokeai/nixos: use PrivateTmp --- projects/invokeai/nixos/default.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/projects/invokeai/nixos/default.nix b/projects/invokeai/nixos/default.nix index 70833ad..660786f 100644 --- a/projects/invokeai/nixos/default.nix +++ b/projects/invokeai/nixos/default.nix @@ -105,6 +105,7 @@ in User = cfg.user; Group = cfg.group; ExecStart = "${getExe cfg.package} ${escapeShellArgs cliArgs}"; + PrivateTmp = true; }; }; systemd.tmpfiles.rules = [ From 1e31b1599c8434405f018fae04c0bec679cabdfe Mon Sep 17 00:00:00 2001 From: Max Date: Wed, 12 Apr 2023 18:23:12 +0200 Subject: [PATCH 4/6] koboldai: apply ROCm workarounds --- projects/koboldai/package.nix | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/projects/koboldai/package.nix b/projects/koboldai/package.nix index 5d1ed5e..c40b96d 100644 --- a/projects/koboldai/package.nix +++ b/projects/koboldai/package.nix @@ -7,6 +7,7 @@ , runCommand , tmpDir ? "/tmp/nix-koboldai" , stateDir ? "$HOME/.koboldai/state" +, libdrm }: let overrides = { @@ -87,6 +88,17 @@ let lupa memcached ]); + + # See note about consumer GPUs: + # https://docs.amd.com/bundle/ROCm-Deep-Learning-Guide-v5.4.3/page/Troubleshooting.html + rocmInit = '' + if [ ! -e /tmp/nix-pytorch-rocm___/amdgpu.ids ] + then + mkdir -p /tmp/nix-pytorch-rocm___ + ln -s ${libdrm}/share/libdrm/amdgpu.ids /tmp/nix-pytorch-rocm___/amdgpu.ids + fi + export HSA_OVERRIDE_GFX_VERSION=''${HSA_OVERRIDE_GFX_VERSION-'10.3.0'} + ''; in (writeShellScriptBin "koboldai" '' if [ -d "/usr/lib/wsl/lib" ] @@ -102,6 +114,7 @@ in ln -s ${stateDir}/models/ ${tmpDir}/models ln -s ${stateDir}/settings/ ${tmpDir}/settings ln -s ${stateDir}/userscripts/ ${tmpDir}/userscripts + ${lib.optionalString (aipython3.torch.rocmSupport or false) rocmInit} ${koboldPython}/bin/python ${patchedSrc}/aiserver.py $@ '').overrideAttrs (_: { From 776fa40bb231aa4aa44b0e4015b6ef110a0df025 Mon Sep 17 00:00:00 2001 From: Max Date: Wed, 12 Apr 2023 18:24:13 +0200 Subject: [PATCH 5/6] invokeai/nixos: remove AMD-specific workarounds --- projects/invokeai/default.nix | 1 - projects/invokeai/nixos/amd.nix | 12 ------------ 2 files changed, 13 deletions(-) delete mode 100644 projects/invokeai/nixos/amd.nix diff --git a/projects/invokeai/default.nix b/projects/invokeai/default.nix index 8cb64e2..cce8598 100644 --- a/projects/invokeai/default.nix +++ b/projects/invokeai/default.nix @@ -29,7 +29,6 @@ invokeai-amd = { imports = [ config.flake.nixosModules.invokeai - ./nixos/amd.nix (packageModule "invokeai-amd") ]; }; diff --git a/projects/invokeai/nixos/amd.nix b/projects/invokeai/nixos/amd.nix deleted file mode 100644 index d49aa27..0000000 --- a/projects/invokeai/nixos/amd.nix +++ /dev/null @@ -1,12 +0,0 @@ -{ pkgs, ... }: - -{ - systemd = { - # Allow "unsupported" AMD GPUs - services.invokeai.environment.HSA_OVERRIDE_GFX_VERSION = "10.3.0"; - # HACK: The PyTorch build we use on ROCm wants this to exist - tmpfiles.rules = [ - "L+ /opt/amdgpu - - - - ${pkgs.libdrm}" - ]; - }; -} From 0bc0ddf92f127a5380f53844546df8cea5306d9d Mon Sep 17 00:00:00 2001 From: Max Date: Wed, 12 Apr 2023 18:24:29 +0200 Subject: [PATCH 6/6] koboldai/nixos: remove AMD-specific workarounds --- projects/koboldai/default.nix | 1 - projects/koboldai/nixos/amd.nix | 12 ------------ 2 files changed, 13 deletions(-) delete mode 100644 projects/koboldai/nixos/amd.nix diff --git a/projects/koboldai/default.nix b/projects/koboldai/default.nix index eb3f7f6..7832e34 100644 --- a/projects/koboldai/default.nix +++ b/projects/koboldai/default.nix @@ -29,7 +29,6 @@ koboldai-amd = { imports = [ config.flake.nixosModules.koboldai - ./nixos/amd.nix (packageModule "koboldai-amd") ]; }; diff --git a/projects/koboldai/nixos/amd.nix b/projects/koboldai/nixos/amd.nix deleted file mode 100644 index 0d3ce7c..0000000 --- a/projects/koboldai/nixos/amd.nix +++ /dev/null @@ -1,12 +0,0 @@ -{ pkgs, ... }: - -{ - systemd = { - # Allow "unsupported" AMD GPUs - services.koboldai.environment.HSA_OVERRIDE_GFX_VERSION = "10.3.0"; - # HACK: The PyTorch build we use on ROCm wants this to exist - tmpfiles.rules = [ - "L+ /opt/amdgpu - - - - ${pkgs.libdrm}" - ]; - }; -}