Merge pull request #24 from nixified-ai/rocm-libdrm-patch-torch-bin

PyTorch: patch libdrm path for ROCm
This commit is contained in:
Matthew Croughan 2023-04-12 21:34:36 +01:00 committed by GitHub
commit 422bf55033
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 32 additions and 26 deletions

View file

@ -95,6 +95,10 @@ pkgs: {
url = "https://download.pytorch.org/whl/rocm5.1.1/torch-1.13.1%2Brocm5.1.1-cp310-cp310-linux_x86_64.whl";
hash = "sha256-qUwAL3L9ODy9hjne8jZQRoG4BxvXXLT7cAy9RbM837A=";
};
postFixup = (old.postFixup or "") + ''
${pkgs.gnused}/bin/sed -i s,/opt/amdgpu/share/libdrm/amdgpu.ids,/tmp/nix-pytorch-rocm___/amdgpu.ids,g $out/${final.python.sitePackages}/torch/lib/libdrm_amdgpu.so
'';
rocmSupport = true;
});
torchvision-bin = prev.torchvision-bin.overrideAttrs (old: {
src = pkgs.fetchurl {

View file

@ -29,7 +29,6 @@
invokeai-amd = {
imports = [
config.flake.nixosModules.invokeai
./nixos/amd.nix
(packageModule "invokeai-amd")
];
};

View file

@ -1,12 +0,0 @@
{ pkgs, ... }:
{
systemd = {
# Allow "unsupported" AMD GPUs
services.invokeai.environment.HSA_OVERRIDE_GFX_VERSION = "10.3.0";
# HACK: The PyTorch build we use on ROCm wants this to exist
tmpfiles.rules = [
"L+ /opt/amdgpu - - - - ${pkgs.libdrm}"
];
};
}

View file

@ -105,6 +105,7 @@ in
User = cfg.user;
Group = cfg.group;
ExecStart = "${getExe cfg.package} ${escapeShellArgs cliArgs}";
PrivateTmp = true;
};
};
systemd.tmpfiles.rules = [

View file

@ -2,6 +2,8 @@
# misc
, lib
, src
# extra deps
, libdrm
}:
let
@ -70,6 +72,18 @@ aipython3.buildPythonPackage {
fi
'
''
] ++ lib.optionals (aipython3.torch.rocmSupport or false) [
'' --run '
if [ ! -e /tmp/nix-pytorch-rocm___/amdgpu.ids ]
then
mkdir -p /tmp/nix-pytorch-rocm___
ln -s ${libdrm}/share/libdrm/amdgpu.ids /tmp/nix-pytorch-rocm___/amdgpu.ids
fi
'
''
# See note about consumer GPUs:
# https://docs.amd.com/bundle/ROCm-Deep-Learning-Guide-v5.4.3/page/Troubleshooting.html
" --set-default HSA_OVERRIDE_GFX_VERSION 10.3.0"
];
patchPhase = ''
runHook prePatch

View file

@ -29,7 +29,6 @@
koboldai-amd = {
imports = [
config.flake.nixosModules.koboldai
./nixos/amd.nix
(packageModule "koboldai-amd")
];
};

View file

@ -1,12 +0,0 @@
{ pkgs, ... }:
{
systemd = {
# Allow "unsupported" AMD GPUs
services.koboldai.environment.HSA_OVERRIDE_GFX_VERSION = "10.3.0";
# HACK: The PyTorch build we use on ROCm wants this to exist
tmpfiles.rules = [
"L+ /opt/amdgpu - - - - ${pkgs.libdrm}"
];
};
}

View file

@ -7,6 +7,7 @@
, runCommand
, tmpDir ? "/tmp/nix-koboldai"
, stateDir ? "$HOME/.koboldai/state"
, libdrm
}:
let
overrides = {
@ -87,6 +88,17 @@ let
lupa
memcached
]);
# See note about consumer GPUs:
# https://docs.amd.com/bundle/ROCm-Deep-Learning-Guide-v5.4.3/page/Troubleshooting.html
rocmInit = ''
if [ ! -e /tmp/nix-pytorch-rocm___/amdgpu.ids ]
then
mkdir -p /tmp/nix-pytorch-rocm___
ln -s ${libdrm}/share/libdrm/amdgpu.ids /tmp/nix-pytorch-rocm___/amdgpu.ids
fi
export HSA_OVERRIDE_GFX_VERSION=''${HSA_OVERRIDE_GFX_VERSION-'10.3.0'}
'';
in
(writeShellScriptBin "koboldai" ''
if [ -d "/usr/lib/wsl/lib" ]
@ -102,6 +114,7 @@ in
ln -s ${stateDir}/models/ ${tmpDir}/models
ln -s ${stateDir}/settings/ ${tmpDir}/settings
ln -s ${stateDir}/userscripts/ ${tmpDir}/userscripts
${lib.optionalString (aipython3.torch.rocmSupport or false) rocmInit}
${koboldPython}/bin/python ${patchedSrc}/aiserver.py $@
'').overrideAttrs
(_: {