mirror of
https://github.com/iperov/DeepFaceLive
synced 2025-07-07 05:22:08 -07:00
added DirectX12-compatible cards support through onnxruntime-directml
This commit is contained in:
parent
071bf80681
commit
6d504d5969
5 changed files with 163 additions and 171 deletions
|
@ -12,7 +12,7 @@ from typing import List
|
||||||
|
|
||||||
class WindowsFolderBuilder:
|
class WindowsFolderBuilder:
|
||||||
"""
|
"""
|
||||||
Builds standalone python folder for Windows with the project from scratch.
|
Builds stand-alone portable all-in-one python folder for Windows with the project from scratch.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Constants
|
# Constants
|
||||||
|
@ -462,23 +462,38 @@ pause
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
|
||||||
def build_deepfacelive_windows(release_dir, cache_dir, python_ver='3.7.9'):
|
def build_deepfacelive_windows(release_dir, cache_dir, python_ver='3.7.9', backend='cuda'):
|
||||||
|
|
||||||
builder = WindowsFolderBuilder(release_path=Path(release_dir),
|
builder = WindowsFolderBuilder(release_path=Path(release_dir),
|
||||||
cache_path=Path(cache_dir),
|
cache_path=Path(cache_dir),
|
||||||
python_ver=python_ver,
|
python_ver=python_ver,
|
||||||
clear_release_path=True)
|
clear_release_path=True)
|
||||||
builder.install_pip_package('numpy==1.21.1')
|
|
||||||
|
# PIP INSTALLATIONS
|
||||||
|
|
||||||
|
builder.install_pip_package('numpy==1.21.2')
|
||||||
builder.install_pip_package('scipy==1.5.4')
|
builder.install_pip_package('scipy==1.5.4')
|
||||||
builder.install_pip_package('numexpr')
|
builder.install_pip_package('numexpr')
|
||||||
builder.install_pip_package('opencv-python==4.5.3.56')
|
builder.install_pip_package('opencv-python==4.5.3.56')
|
||||||
builder.install_pip_package('opencv-contrib-python==4.5.3.56')
|
builder.install_pip_package('opencv-contrib-python==4.5.3.56')
|
||||||
builder.install_pip_package('pyqt6==6.1.1')
|
builder.install_pip_package('pyqt6==6.1.1')
|
||||||
|
builder.install_pip_package('onnx==1.10.1')
|
||||||
|
|
||||||
|
if backend == 'cuda':
|
||||||
builder.install_pip_package('torch==1.8.1+cu111 torchvision==0.9.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html')
|
builder.install_pip_package('torch==1.8.1+cu111 torchvision==0.9.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html')
|
||||||
builder.install_pip_package('onnxruntime-gpu==1.8.1')
|
builder.install_pip_package('onnxruntime-gpu==1.8.1')
|
||||||
builder.install_pip_package('cupy-cuda111===9.0.0')
|
builder.install_pip_package('cupy-cuda111===9.0.0')
|
||||||
|
elif backend == 'directml':
|
||||||
|
if python_ver[:3] == '3.7':
|
||||||
|
builder.install_pip_package('https://github.com/iperov/DeepFaceLive/releases/download/ort-dml/onnxruntime_directml-1.8.2-cp37-cp37m-win_amd64.whl')
|
||||||
|
else:
|
||||||
|
raise Exception(f'no onnxruntime_directml wheel for python {python_ver}')
|
||||||
|
|
||||||
builder.install_ffmpeg_binaries()
|
builder.install_ffmpeg_binaries()
|
||||||
|
|
||||||
|
#
|
||||||
|
|
||||||
|
if backend == 'cuda':
|
||||||
print('Moving CUDA dlls from Torch to shared directory')
|
print('Moving CUDA dlls from Torch to shared directory')
|
||||||
cuda_bin_path = builder.cuda_bin_path
|
cuda_bin_path = builder.cuda_bin_path
|
||||||
torch_lib_path = builder.python_site_packages_path / 'torch' / 'lib'
|
torch_lib_path = builder.python_site_packages_path / 'torch' / 'lib'
|
||||||
|
@ -511,8 +526,13 @@ def build_deepfacelive_windows(release_dir, cache_dir, python_ver='3.7.9'):
|
||||||
print('Copying samples.')
|
print('Copying samples.')
|
||||||
shutil.copytree( str(Path(__file__).parent.parent / 'samples'), str(userdata_path / 'samples') )
|
shutil.copytree( str(Path(__file__).parent.parent / 'samples'), str(userdata_path / 'samples') )
|
||||||
|
|
||||||
|
if backend == 'cuda':
|
||||||
builder.create_run_python_script('DeepFaceLive.bat', 'DeepFaceLive\\main.py', 'run DeepFaceLive --userdata-dir=%~dp0userdata')
|
builder.create_run_python_script('DeepFaceLive.bat', 'DeepFaceLive\\main.py', 'run DeepFaceLive --userdata-dir=%~dp0userdata')
|
||||||
builder.create_internal_run_python_script('build DeepFaceLive.bat','DeepFaceLive\\build\\windows\\WindowsBuilder.py', '--build-type dfl-windows --release-dir Builds\DeepFaceLive --cache-dir _cache' )
|
elif backend == 'directml':
|
||||||
|
builder.create_run_python_script('DeepFaceLive.bat', 'DeepFaceLive\\main.py', 'run DeepFaceLive --userdata-dir=%~dp0userdata --no-cuda')
|
||||||
|
|
||||||
|
builder.create_internal_run_python_script('build DeepFaceLive CUDA.bat', 'DeepFaceLive\\build\\windows\\WindowsBuilder.py', '--build-type dfl-windows --release-dir Builds\DeepFaceLive --cache-dir _cache --backend cuda')
|
||||||
|
builder.create_internal_run_python_script('build DeepFaceLive DirectML.bat', 'DeepFaceLive\\build\\windows\\WindowsBuilder.py', '--build-type dfl-windows --release-dir Builds\DeepFaceLive --cache-dir _cache --backend directml')
|
||||||
|
|
||||||
builder.run_python('main.py dev merge_large_files --delete-parts', cwd=deepfacelive_path)
|
builder.run_python('main.py dev merge_large_files --delete-parts', cwd=deepfacelive_path)
|
||||||
|
|
||||||
|
@ -531,12 +551,15 @@ if __name__ == '__main__':
|
||||||
p.add_argument('--release-dir', action=fixPathAction, default=None)
|
p.add_argument('--release-dir', action=fixPathAction, default=None)
|
||||||
p.add_argument('--cache-dir', action=fixPathAction, default=None)
|
p.add_argument('--cache-dir', action=fixPathAction, default=None)
|
||||||
p.add_argument('--python-ver', default="3.7.9")
|
p.add_argument('--python-ver', default="3.7.9")
|
||||||
|
p.add_argument('--backend', choices=['cuda', 'directml'], default='cuda')
|
||||||
|
|
||||||
args = p.parse_args()
|
args = p.parse_args()
|
||||||
|
|
||||||
if args.build_type == 'dfl-windows':
|
if args.build_type == 'dfl-windows':
|
||||||
build_deepfacelive_windows(release_dir=args.release_dir,
|
build_deepfacelive_windows(release_dir=args.release_dir,
|
||||||
cache_dir=args.cache_dir,
|
cache_dir=args.cache_dir,
|
||||||
python_ver=args.python_ver)
|
python_ver=args.python_ver,
|
||||||
|
backend=args.backend)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
11
main.py
11
main.py
|
@ -1,5 +1,9 @@
|
||||||
|
import argparse
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from xlib import appargs as lib_appargs
|
||||||
|
|
||||||
# onnxruntime==1.8.0 requires CUDA_PATH_V11_2, but 1.8.1 don't
|
# onnxruntime==1.8.0 requires CUDA_PATH_V11_2, but 1.8.1 don't
|
||||||
# keep the code if they return that behaviour
|
# keep the code if they return that behaviour
|
||||||
|
@ -10,9 +14,6 @@ import platform
|
||||||
# # set environ for onnxruntime
|
# # set environ for onnxruntime
|
||||||
# # os.environ['CUDA_PATH_V11_2'] = os.environ['CUDA_PATH']
|
# # os.environ['CUDA_PATH_V11_2'] = os.environ['CUDA_PATH']
|
||||||
|
|
||||||
import argparse
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
subparsers = parser.add_subparsers()
|
subparsers = parser.add_subparsers()
|
||||||
|
@ -22,15 +23,17 @@ def main():
|
||||||
|
|
||||||
def run_DeepFaceLive(args):
|
def run_DeepFaceLive(args):
|
||||||
userdata_path = Path(args.userdata_dir)
|
userdata_path = Path(args.userdata_dir)
|
||||||
|
lib_appargs.set_arg_bool('NO_CUDA', args.no_cuda)
|
||||||
|
|
||||||
print('Running DeepFaceLive.')
|
print('Running DeepFaceLive.')
|
||||||
from apps.DeepFaceLive.DeepFaceLiveApp import DeepFaceLiveApp
|
from apps.DeepFaceLive.DeepFaceLiveApp import DeepFaceLiveApp
|
||||||
DeepFaceLiveApp(userdata_path=userdata_path).run()
|
DeepFaceLiveApp(userdata_path=userdata_path).run()
|
||||||
|
|
||||||
p = run_subparsers.add_parser('DeepFaceLive')
|
p = run_subparsers.add_parser('DeepFaceLive')
|
||||||
p.add_argument('--userdata-dir', default=None, action=fixPathAction, help="Workspace directory.")
|
p.add_argument('--userdata-dir', default=None, action=fixPathAction, help="Workspace directory.")
|
||||||
|
p.add_argument('--no-cuda', action="store_true", default=False, help="Disable CUDA.")
|
||||||
p.set_defaults(func=run_DeepFaceLive)
|
p.set_defaults(func=run_DeepFaceLive)
|
||||||
|
|
||||||
|
|
||||||
dev_parser = subparsers.add_parser("dev")
|
dev_parser = subparsers.add_parser("dev")
|
||||||
dev_subparsers = dev_parser.add_subparsers()
|
dev_subparsers = dev_parser.add_subparsers()
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
from .. import appargs as lib_appargs
|
||||||
|
|
||||||
|
|
||||||
class CuPyDeviceInfo:
|
class CuPyDeviceInfo:
|
||||||
|
@ -53,7 +53,11 @@ def get_available_devices() -> List[CuPyDeviceInfo]:
|
||||||
"""
|
"""
|
||||||
returns a list of available CuPyDeviceInfo
|
returns a list of available CuPyDeviceInfo
|
||||||
"""
|
"""
|
||||||
|
if lib_appargs.get_arg_bool('NO_CUDA'):
|
||||||
|
return []
|
||||||
|
|
||||||
global _cupy_devices
|
global _cupy_devices
|
||||||
|
|
||||||
if _cupy_devices is None:
|
if _cupy_devices is None:
|
||||||
import cupy as cp # BUG eats 1.8Gb paging file per process, so import on demand
|
import cupy as cp # BUG eats 1.8Gb paging file per process, so import on demand
|
||||||
devices = []
|
devices = []
|
||||||
|
|
|
@ -18,21 +18,18 @@ def InferenceSession_with_device(onnx_model_or_path, device_info : ORTDeviceInfo
|
||||||
onnx.save(onnx_model_or_path, b)
|
onnx.save(onnx_model_or_path, b)
|
||||||
onnx_model_or_path = b.getvalue()
|
onnx_model_or_path = b.getvalue()
|
||||||
|
|
||||||
prs = rt.get_available_providers()
|
device_ep = device_info.get_execution_provider()
|
||||||
|
if device_ep not in rt.get_available_providers():
|
||||||
|
raise Exception(f'{device_ep} is not avaiable in onnxruntime')
|
||||||
|
|
||||||
if device_info.is_cpu():
|
ep_flags = {}
|
||||||
if 'CPUExecutionProvider' not in prs:
|
if device_ep in ['CUDAExecutionProvider','DmlExecutionProvider']:
|
||||||
raise Exception('CPUExecutionProvider is not avaiable in onnxruntime')
|
ep_flags['device_id'] = device_info.get_index()
|
||||||
providers = ['CPUExecutionProvider']
|
|
||||||
else:
|
|
||||||
if 'CUDAExecutionProvider' not in prs:
|
|
||||||
raise Exception('CUDAExecutionProvider is not avaiable in onnxruntime')
|
|
||||||
providers = [ ('CUDAExecutionProvider', {'device_id': device_info.get_index() }) ]
|
|
||||||
#providers = [ ('DmlExecutionProvider', {'device_id': 1 }) ]
|
|
||||||
|
|
||||||
sess_options = rt.SessionOptions()
|
sess_options = rt.SessionOptions()
|
||||||
#sess_options.enable_mem_pattern = False #for DmlExecutionProvider
|
|
||||||
sess_options.log_severity_level = 4
|
sess_options.log_severity_level = 4
|
||||||
sess_options.log_verbosity_level = -1
|
sess_options.log_verbosity_level = -1
|
||||||
sess = rt.InferenceSession(onnx_model_or_path, providers=providers, sess_options=sess_options)
|
if device_ep == 'DmlExecutionProvider':
|
||||||
|
sess_options.enable_mem_pattern = False
|
||||||
|
sess = rt.InferenceSession(onnx_model_or_path, providers=[ (device_ep, ep_flags) ], sess_options=sess_options)
|
||||||
return sess
|
return sess
|
||||||
|
|
|
@ -1,19 +1,24 @@
|
||||||
import ctypes
|
import ctypes
|
||||||
|
import itertools
|
||||||
import os
|
import os
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
import onnxruntime as rt
|
||||||
|
|
||||||
|
from .. import appargs as lib_appargs
|
||||||
|
|
||||||
|
|
||||||
class ORTDeviceInfo:
|
class ORTDeviceInfo:
|
||||||
"""
|
"""
|
||||||
Represents picklable ONNXRuntime device info
|
Represents picklable ONNXRuntime device info
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, index=None, name=None, total_memory=None, free_memory=None, compute_capability=None):
|
def __init__(self, index=None, execution_provider=None, name=None, total_memory=None, free_memory=None):
|
||||||
self._index : int = index
|
self._index : int = index
|
||||||
|
self._execution_provider : str = execution_provider
|
||||||
self._name : str = name
|
self._name : str = name
|
||||||
self._total_memory : int = total_memory
|
self._total_memory : int = total_memory
|
||||||
self._free_memory : int = free_memory
|
self._free_memory : int = free_memory
|
||||||
self._compute_capability : int = compute_capability
|
|
||||||
|
|
||||||
def __getstate__(self):
|
def __getstate__(self):
|
||||||
return self.__dict__.copy()
|
return self.__dict__.copy()
|
||||||
|
@ -27,8 +32,8 @@ class ORTDeviceInfo:
|
||||||
def get_index(self) -> int:
|
def get_index(self) -> int:
|
||||||
return self._index
|
return self._index
|
||||||
|
|
||||||
def get_compute_capability(self) -> int:
|
def get_execution_provider(self) -> str:
|
||||||
return self._compute_capability
|
return self._execution_provider
|
||||||
|
|
||||||
def get_name(self) -> str:
|
def get_name(self) -> str:
|
||||||
return self._name
|
return self._name
|
||||||
|
@ -51,90 +56,19 @@ class ORTDeviceInfo:
|
||||||
if self.is_cpu():
|
if self.is_cpu():
|
||||||
return f"CPU"
|
return f"CPU"
|
||||||
else:
|
else:
|
||||||
return f"[{self._index}] {self._name} [{(self._total_memory / 1024**3) :.3}Gb]"
|
ep = self.get_execution_provider()
|
||||||
|
if ep == 'CUDAExecutionProvider':
|
||||||
|
return f"[{self._index}] {self._name} [{(self._total_memory / 1024**3) :.3}Gb] [CUDA]"
|
||||||
|
elif ep == 'DmlExecutionProvider':
|
||||||
|
return f"[{self._index}] {self._name} [{(self._total_memory / 1024**3) :.3}Gb] [DirectX12]"
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f'{self.__class__.__name__} object: ' + self.__str__()
|
return f'{self.__class__.__name__} object: ' + self.__str__()
|
||||||
|
|
||||||
|
|
||||||
# class ORTDevicesInfo:
|
|
||||||
# """
|
|
||||||
# a list of ORTDeviceInfo
|
|
||||||
# """
|
|
||||||
|
|
||||||
# def __init__(self, devices : List[ORTDeviceInfo] = None):
|
|
||||||
# if devices is None:
|
|
||||||
# devices = []
|
|
||||||
# self._devices = devices
|
|
||||||
|
|
||||||
# def __getstate__(self):
|
|
||||||
# return self.__dict__.copy()
|
|
||||||
|
|
||||||
# def __setstate__(self, d):
|
|
||||||
# self.__init__()
|
|
||||||
# self.__dict__.update(d)
|
|
||||||
|
|
||||||
# def add(self, device_or_devices : ORTDeviceInfo):
|
|
||||||
# if isinstance(device_or_devices, ORTDeviceInfo):
|
|
||||||
# if device_or_devices not in self._devices:
|
|
||||||
# self._devices.append(device_or_devices)
|
|
||||||
# elif isinstance(device_or_devices, ORTDevicesInfo):
|
|
||||||
# for device in device_or_devices:
|
|
||||||
# self.add(device)
|
|
||||||
|
|
||||||
# def copy(self):
|
|
||||||
# return copy.deepcopy(self)
|
|
||||||
|
|
||||||
# def get_count(self): return len(self._devices)
|
|
||||||
|
|
||||||
# def get_highest_total_memory_device(self) -> ORTDeviceInfo:
|
|
||||||
# """
|
|
||||||
# returns ORTDeviceInfo with highest available memory, if devices support total_memory parameter
|
|
||||||
# """
|
|
||||||
# result = None
|
|
||||||
# idx_mem = 0
|
|
||||||
# for device in self._devices:
|
|
||||||
# mem = device.get_total_memory()
|
|
||||||
# if result is None or (mem is not None and mem > idx_mem):
|
|
||||||
# result = device
|
|
||||||
# idx_mem = mem
|
|
||||||
# return result
|
|
||||||
|
|
||||||
# def get_lowest_total_memory_device(self) -> ORTDeviceInfo:
|
|
||||||
# """
|
|
||||||
# returns ORTDeviceInfo with lowest available memory, if devices support total_memory parameter
|
|
||||||
# """
|
|
||||||
# result = None
|
|
||||||
# idx_mem = sys.maxsize
|
|
||||||
# for device in self._devices:
|
|
||||||
# mem = device.get_total_memory()
|
|
||||||
# if result is None or (mem is not None and mem < idx_mem):
|
|
||||||
# result = device
|
|
||||||
# idx_mem = mem
|
|
||||||
# return result
|
|
||||||
|
|
||||||
# def __len__(self):
|
|
||||||
# return len(self._devices)
|
|
||||||
|
|
||||||
# def __getitem__(self, key):
|
|
||||||
# result = self._devices[key]
|
|
||||||
# if isinstance(key, slice):
|
|
||||||
# return self.__class__(result)
|
|
||||||
# return result
|
|
||||||
|
|
||||||
# def __iter__(self):
|
|
||||||
# for device in self._devices:
|
|
||||||
# yield device
|
|
||||||
|
|
||||||
# def __str__(self): return f'{self.__class__.__name__}:[' + ', '.join([ device.__str__() for device in self._devices ]) + ']'
|
|
||||||
# def __repr__(self): return f'{self.__class__.__name__}:[' + ', '.join([ device.__repr__() for device in self._devices ]) + ']'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
_ort_devices_info = None
|
_ort_devices_info = None
|
||||||
|
|
||||||
def get_cpu_device() -> ORTDeviceInfo:
|
def get_cpu_device() -> ORTDeviceInfo:
|
||||||
return ORTDeviceInfo(index=-1, name='CPU', total_memory=0, free_memory=0, compute_capability=0)
|
return ORTDeviceInfo(index=-1, execution_provider='CPUExecutionProvider', name='CPU', total_memory=0, free_memory=0)
|
||||||
|
|
||||||
def get_available_devices_info(include_cpu=True, cpu_only=False) -> List[ORTDeviceInfo]:
|
def get_available_devices_info(include_cpu=True, cpu_only=False) -> List[ORTDeviceInfo]:
|
||||||
"""
|
"""
|
||||||
|
@ -145,12 +79,13 @@ def get_available_devices_info(include_cpu=True, cpu_only=False) -> List[ORTDevi
|
||||||
_initialize_ort_devices()
|
_initialize_ort_devices()
|
||||||
devices = []
|
devices = []
|
||||||
if not cpu_only:
|
if not cpu_only:
|
||||||
for i in range ( int(os.environ['ORT_DEVICES_COUNT']) ):
|
for i in range ( int(os.environ.get('ORT_DEVICES_COUNT',0)) ):
|
||||||
devices.append ( ORTDeviceInfo(index=i,
|
devices.append ( ORTDeviceInfo(index=int(os.environ[f'ORT_DEVICE_{i}_INDEX']),
|
||||||
|
execution_provider=os.environ[f'ORT_DEVICE_{i}_EP'],
|
||||||
name=os.environ[f'ORT_DEVICE_{i}_NAME'],
|
name=os.environ[f'ORT_DEVICE_{i}_NAME'],
|
||||||
total_memory=int(os.environ[f'ORT_DEVICE_{i}_TOTAL_MEM']),
|
total_memory=int(os.environ[f'ORT_DEVICE_{i}_TOTAL_MEM']),
|
||||||
free_memory=int(os.environ[f'ORT_DEVICE_{i}_FREE_MEM']),
|
free_memory=int(os.environ[f'ORT_DEVICE_{i}_FREE_MEM']),
|
||||||
compute_capability=int(os.environ[f'ORT_DEVICE_{i}_CC']) ))
|
) )
|
||||||
if include_cpu or cpu_only:
|
if include_cpu or cpu_only:
|
||||||
devices.append(get_cpu_device())
|
devices.append(get_cpu_device())
|
||||||
_ort_devices_info = devices
|
_ort_devices_info = devices
|
||||||
|
@ -168,6 +103,10 @@ def _initialize_ort_devices():
|
||||||
if int(os.environ.get('ORT_DEVICES_INITIALIZED', 0)) == 0:
|
if int(os.environ.get('ORT_DEVICES_INITIALIZED', 0)) == 0:
|
||||||
os.environ['ORT_DEVICES_INITIALIZED'] = '1'
|
os.environ['ORT_DEVICES_INITIALIZED'] = '1'
|
||||||
os.environ['ORT_DEVICES_COUNT'] = '0'
|
os.environ['ORT_DEVICES_COUNT'] = '0'
|
||||||
|
|
||||||
|
devices = []
|
||||||
|
prs = rt.get_available_providers()
|
||||||
|
if not lib_appargs.get_arg_bool('NO_CUDA') and 'CUDAExecutionProvider' in prs:
|
||||||
os.environ['CUDA_CACHE_MAXSIZE'] = '2147483647'
|
os.environ['CUDA_CACHE_MAXSIZE'] = '2147483647'
|
||||||
try:
|
try:
|
||||||
libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll')
|
libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll')
|
||||||
|
@ -189,7 +128,7 @@ def _initialize_ort_devices():
|
||||||
totalMem = ctypes.c_size_t()
|
totalMem = ctypes.c_size_t()
|
||||||
device = ctypes.c_int()
|
device = ctypes.c_int()
|
||||||
context = ctypes.c_void_p()
|
context = ctypes.c_void_p()
|
||||||
devices = []
|
|
||||||
|
|
||||||
if cuda.cuInit(0) == 0 and \
|
if cuda.cuInit(0) == 0 and \
|
||||||
cuda.cuDeviceGetCount(ctypes.byref(nGpus)) == 0:
|
cuda.cuDeviceGetCount(ctypes.byref(nGpus)) == 0:
|
||||||
|
@ -202,21 +141,47 @@ def _initialize_ort_devices():
|
||||||
if cuda.cuCtxCreate_v2(ctypes.byref(context), 0, device) == 0:
|
if cuda.cuCtxCreate_v2(ctypes.byref(context), 0, device) == 0:
|
||||||
if cuda.cuMemGetInfo_v2(ctypes.byref(freeMem), ctypes.byref(totalMem)) == 0:
|
if cuda.cuMemGetInfo_v2(ctypes.byref(freeMem), ctypes.byref(totalMem)) == 0:
|
||||||
cc = cc_major.value * 10 + cc_minor.value
|
cc = cc_major.value * 10 + cc_minor.value
|
||||||
devices.append ({'name' : name.split(b'\0', 1)[0].decode(),
|
devices.append ({'index' : i,
|
||||||
|
'execution_provider' : 'CUDAExecutionProvider',
|
||||||
|
'name' : name.split(b'\0', 1)[0].decode(),
|
||||||
'total_mem' : totalMem.value,
|
'total_mem' : totalMem.value,
|
||||||
'free_mem' : freeMem.value,
|
'free_mem' : freeMem.value,
|
||||||
'cc' : cc
|
|
||||||
})
|
})
|
||||||
cuda.cuCtxDetach(context)
|
cuda.cuCtxDetach(context)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f'CUDA devices initialization error: {e}')
|
print(f'CUDA devices initialization error: {e}')
|
||||||
devices = []
|
|
||||||
|
if 'DmlExecutionProvider' in prs:
|
||||||
|
# onnxruntime-directml has no device enumeration API for users. Thus the code must follow the same logic
|
||||||
|
# as here https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/core/providers/dml/dml_provider_factory.cc
|
||||||
|
|
||||||
|
from xlib.api.win32 import dxgi as lib_dxgi
|
||||||
|
|
||||||
|
dxgi_factory = lib_dxgi.create_DXGIFactory4()
|
||||||
|
if dxgi_factory is not None:
|
||||||
|
for i in itertools.count():
|
||||||
|
adapter = dxgi_factory.enum_adapters1(i)
|
||||||
|
if adapter is not None:
|
||||||
|
desc = adapter.get_desc1()
|
||||||
|
if desc.Flags != lib_dxgi.DXGI_ADAPTER_FLAG.DXGI_ADAPTER_FLAG_SOFTWARE and \
|
||||||
|
not (desc.VendorId == 0x1414 and desc.DeviceId == 0x8c):
|
||||||
|
devices.append ({'index' : i,
|
||||||
|
'execution_provider' : 'DmlExecutionProvider',
|
||||||
|
'name' : desc.Description,
|
||||||
|
'total_mem' : desc.DedicatedVideoMemory,
|
||||||
|
'free_mem' : desc.DedicatedVideoMemory,
|
||||||
|
})
|
||||||
|
adapter.Release()
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
dxgi_factory.Release()
|
||||||
|
|
||||||
os.environ['ORT_DEVICES_COUNT'] = str(len(devices))
|
os.environ['ORT_DEVICES_COUNT'] = str(len(devices))
|
||||||
for i, device in enumerate(devices):
|
for i, device in enumerate(devices):
|
||||||
|
os.environ[f'ORT_DEVICE_{i}_INDEX'] = str(device['index'])
|
||||||
|
os.environ[f'ORT_DEVICE_{i}_EP'] = device['execution_provider']
|
||||||
os.environ[f'ORT_DEVICE_{i}_NAME'] = device['name']
|
os.environ[f'ORT_DEVICE_{i}_NAME'] = device['name']
|
||||||
os.environ[f'ORT_DEVICE_{i}_TOTAL_MEM'] = str(device['total_mem'])
|
os.environ[f'ORT_DEVICE_{i}_TOTAL_MEM'] = str(device['total_mem'])
|
||||||
os.environ[f'ORT_DEVICE_{i}_FREE_MEM'] = str(device['free_mem'])
|
os.environ[f'ORT_DEVICE_{i}_FREE_MEM'] = str(device['free_mem'])
|
||||||
os.environ[f'ORT_DEVICE_{i}_CC'] = str(device['cc'])
|
|
||||||
|
|
||||||
_initialize_ort_devices()
|
_initialize_ort_devices()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue