scripts/vsmlrt.py: add `prefer_nhwc` flag to the ort_cuda backend #276

Workflow file for this run

.github/workflows/windows-ort.yml at 0abb2a3

	name: Build (Windows-ORT)

	on:
	push:
	paths:
	- 'common/**'
	- 'vsort/**'
	- '.github/workflows/windows-ort.yml'
	workflow_call:
	inputs:
	tag:
	description: 'which tag to upload to'
	required: true
	type: string
	workflow_dispatch:
	inputs:
	tag:
	description: 'which tag to upload to'
	default: ''

	jobs:
	build-windows:
	runs-on: windows-2022

	defaults:
	run:
	shell: cmd
	working-directory: vsort

	steps:
	- name: Checkout repo
	uses: actions/checkout@v4
	with:
	fetch-depth: 0

	- name: Setup MSVC
	uses: ilammy/msvc-dev-cmd@v1

	- name: Setup Ninja
	run: pip install ninja

	- name: Cache protobuf
	id: cache-protobuf
	uses: actions/cache@v4
	with:
	path: vsort/protobuf/install
	key: ${{ runner.os }}-vsort-protobuf-v4

	- name: Checkout protobuf
	uses: actions/checkout@v4
	if: steps.cache-protobuf.outputs.cache-hit != 'true'
	with:
	repository: protocolbuffers/protobuf
	# follows protobuf in https://github.com/AmusementClub/onnxruntime/blob/master/cmake/external/onnxruntime_external_deps.cmake#L203
	# if you change this, remember to bump the version of the cache key.
	ref: v3.21.12
	fetch-depth: 1
	path: vsort/protobuf

	- name: Configure protobuf
	if: steps.cache-protobuf.outputs.cache-hit != 'true'
	run: cmake -S protobuf -B protobuf\build_rel -G Ninja -LA
	-D CMAKE_BUILD_TYPE=Release
	-D protobuf_BUILD_SHARED_LIBS=OFF -D protobuf_BUILD_TESTS=OFF

	- name: Build protobuf
	if: steps.cache-protobuf.outputs.cache-hit != 'true'
	run: cmake --build protobuf\build_rel --verbose

	- name: Install protobuf
	if: steps.cache-protobuf.outputs.cache-hit != 'true'
	run: cmake --install protobuf\build_rel --prefix protobuf\install

	- name: Cache onnx
	id: cache-onnx
	uses: actions/cache@v4
	with:
	path: vsort/onnx/install
	key: ${{ runner.os }}-vsort-onnx-v5

	- name: Checkout onnx
	if: steps.cache-onnx.outputs.cache-hit != 'true'
	uses: actions/checkout@v4
	with:
	repository: onnx/onnx
	# follows onnx in https://github.com/AmusementClub/onnxruntime/tree/master/cmake/external
	# if you change this, remember to bump the version of the cache key.
	ref: 990217f043af7222348ca8f0301e17fa7b841781
	fetch-depth: 1
	path: vsort/onnx

	- name: Configure onnx
	if: steps.cache-onnx.outputs.cache-hit != 'true'
	run: cmake -S onnx -B onnx\build -G Ninja -LA
	-D CMAKE_BUILD_TYPE=Release
	-D Protobuf_PROTOC_EXECUTABLE=protobuf\install\bin\protoc
	-D Protobuf_LITE_LIBRARY=protobuf\install\lib
	-D Protobuf_LIBRARIES=protobuf\install\lib
	-D ONNX_USE_LITE_PROTO=ON -D ONNX_USE_PROTOBUF_SHARED_LIBS=OFF
	-D ONNX_GEN_PB_TYPE_STUBS=OFF -D ONNX_ML=0
	-D ONNX_USE_MSVC_STATIC_RUNTIME=1

	- name: Build onnx
	if: steps.cache-onnx.outputs.cache-hit != 'true'
	run: cmake --build onnx\build --verbose

	- name: Install onnx
	if: steps.cache-onnx.outputs.cache-hit != 'true'
	run: cmake --install onnx\build --prefix onnx\install

	- name: Download VapourSynth headers
	run: \|
	curl -s -o vs.zip -L https://github.com/vapoursynth/vapoursynth/archive/refs/tags/R54.zip
	unzip -q vs.zip
	mv vapoursynth-*/ vapoursynth/

	- name: Download ONNX Runtime Precompilation
	run: \|
	curl -s -o ortgpu.zip -LJO https://github.com/AmusementClub/onnxruntime/releases/download/orttraining_rc2-7983-g9001c69b84-240419-0832/onnxruntime-gpu-win64.zip
	unzip -q ortgpu.zip

	- name: Cache CUDA
	id: cache-cuda
	uses: actions/cache@v4
	with:
	path: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
	key: ${{ runner.os }}-cuda-12.4.0

	- name: Setup CUDA
	if: steps.cache-cuda.outputs.cache-hit != 'true'
	run: \|
	curl -s -o cuda_installer.exe -L https://developer.download.nvidia.com/compute/cuda/12.4.0/network_installers/cuda_12.4.0_windows_network.exe
	cuda_installer.exe -s nvcc_12.4 cudart_12.4

	- name: Configure
	run: cmake -S . -B build -G Ninja -LA
	-D CMAKE_BUILD_TYPE=Release
	-D CMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded
	-D VAPOURSYNTH_INCLUDE_DIRECTORY=vapoursynth\include
	-D protobuf_DIR=protobuf\install\cmake
	-D ONNX_DIR=onnx\install\lib\cmake\ONNX
	-D ONNX_RUNTIME_API_DIRECTORY=onnxruntime-gpu\include\onnxruntime
	-D ONNX_RUNTIME_LIB_DIRECTORY=onnxruntime-gpu\lib
	-D ENABLE_CUDA=1
	-D CUDAToolkit_ROOT="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
	-D ENABLE_DML=1
	-D CMAKE_CXX_STANDARD=20

	- name: Build
	run: cmake --build build --verbose

	- name: Install
	run: \|
	cmake --install build --prefix install
	mkdir artifact
	mkdir artifact\vsort
	copy install\bin\vsort.dll artifact\
	copy onnxruntime-gpu\bin\*.dll artifact\vsort\
	copy onnxruntime-gpu\lib\*.dll artifact\vsort\

	- name: Download DirectML Library
	# follows DirectML in https://github.com/AmusementClub/onnxruntime/blob/master/cmake/external/dml.cmake#L44
	run: \|
	curl -s -o directml.nupkg -LJO https://www.nuget.org/api/v2/package/Microsoft.AI.DirectML/1.14.1
	unzip -q directml.nupkg -d dml
	copy dml\bin\x64-win\DirectML.dll artifact\vsort\

	- name: Upload
	uses: actions/upload-artifact@v4
	with:
	name: VSORT-Windows-x64
	path: vsort/artifact

	- name: Setup Python portable
	run: \|
	curl -s -o python.zip -LJO https://www.python.org/ftp/python/3.9.10/python-3.9.10-embed-amd64.zip
	7z x python.zip -ovs_portable

	- name: Install VapourSynth portable
	run: \|
	curl -s -o vs.7z -LJO https://github.com/vapoursynth/vapoursynth/releases/download/R54/VapourSynth64-Portable-R54.7z
	7z x vs.7z -ovs_portable -y

	- name: Copy plugin
	run: \|
	copy artifact\*.dll vs_portable\vapoursynth64\plugins
	mkdir vs_portable\vapoursynth64\plugins\vsort\
	copy artifact\vsort\*.dll vs_portable\vapoursynth64\plugins\vsort\

	- name: Install waifu2x model
	run: \|
	curl -s -o waifu2x.7z -LJO https://github.com/AmusementClub/vs-mlrt/releases/download/model-20211209/waifu2x_v3.7z
	7z x waifu2x.7z -ovs_portable\vapoursynth64\plugins\models

	- name: Download x265
	run: \|
	curl -s -o x265.7z -LJO https://github.com/AmusementClub/x265/releases/download/Yuuki-3.5-AC3/x265-win64-x86-64-clang.Yuuki-3.5-AC3.7z
	7z x x265.7z -ovs_portable\

	- name: Create script
	shell: bash
	run: echo "import vapoursynth as vs;from vapoursynth import core;import sys;print(core.ort, file=sys.stderr);print(core.ort.Version(),file=sys.stderr);core.std.BlankClip(format=vs.RGBS).ort.Model(r\"waifu2x\\upconv_7_anime_style_art_rgb\\scale2.0x_model.onnx\", builtin=True).resize.Bicubic(format=vs.YUV420P10, matrix_s='709').set_output()" > test.vpy

	- name: Run vspipe
	shell: bash
	run: \|
	set -ex
	vs_portable/vspipe -i test.vpy -
	vs_portable/vspipe --y4m -p -e 9 test.vpy - \| vs_portable/x265 --log-file x265.log --log-file-level info --y4m -D 10 --preset ultrafast -o out.hevc -
	ls -l out.hevc x265.log
	cat x265.log
	grep -F 'encoded 10 frames' x265.log \|\| exit 2
	grep -i 'error' x265.log && exit 1
	exit 0

	- name: Create script (fp16)
	shell: bash
	run: echo "import vapoursynth as vs;from vapoursynth import core;import sys;print(core.ort, file=sys.stderr);core.std.BlankClip(format=vs.RGBS).ort.Model(r\"waifu2x\\upconv_7_anime_style_art_rgb\\scale2.0x_model.onnx\", builtin=True, fp16=True).resize.Bicubic(format=vs.YUV420P10, matrix_s='709').set_output()" > test_fp16.vpy

	- name: Run vspipe (fp16)
	shell: bash
	run: \|
	set -ex
	vs_portable/vspipe -i test_fp16.vpy -
	vs_portable/vspipe --y4m -p -e 9 test_fp16.vpy - \| vs_portable/x265 --log-file x265.log --log-file-level info --y4m -D 10 --preset ultrafast -o out.hevc -
	ls -l out.hevc x265.log
	cat x265.log
	grep -F 'encoded 10 frames' x265.log \|\| exit 2
	grep -i 'error' x265.log && exit 1
	exit 0

	- name: Create script (fp16 input)
	shell: bash
	run: echo "import vapoursynth as vs;from vapoursynth import core;import sys;print(core.ort, file=sys.stderr);flt=core.std.BlankClip(format=vs.RGBH).ort.Model(r\"waifu2x\\upconv_7_anime_style_art_rgb\\scale2.0x_model.onnx\", builtin=True, fp16=True);print(flt,file=sys.stderr);flt.resize.Bicubic(format=vs.YUV420P10, matrix_s='709').set_output()" > test_fp16_input.vpy

	- name: Run vspipe (fp16 input)
	shell: bash
	run: \|
	set -ex
	vs_portable/vspipe -i test_fp16_input.vpy -
	vs_portable/vspipe --y4m -p -e 9 test_fp16_input.vpy - \| vs_portable/x265 --log-file x265.log --log-file-level info --y4m -D 10 --preset ultrafast -o out.hevc -
	ls -l out.hevc x265.log
	cat x265.log
	grep -F 'encoded 10 frames' x265.log \|\| exit 2
	grep -i 'error' x265.log && exit 1
	exit 0

	- name: Create script (fp16 output)
	shell: bash
	run: echo "import vapoursynth as vs;from vapoursynth import core;import sys;print(core.ort, file=sys.stderr);flt=core.std.BlankClip(format=vs.RGBS).ort.Model(r\"waifu2x\\upconv_7_anime_style_art_rgb\\scale2.0x_model.onnx\", builtin=True, fp16=True, output_format=1);print(flt,file=sys.stderr);flt.resize.Bicubic(format=vs.YUV420P10, matrix_s='709').set_output()" > test_fp16_output.vpy

	- name: Run vspipe (fp16 output)
	shell: bash
	run: \|
	set -ex
	vs_portable/vspipe -i test_fp16_output.vpy -
	vs_portable/vspipe --y4m -p -e 9 test_fp16_output.vpy - \| vs_portable/x265 --log-file x265.log --log-file-level info --y4m -D 10 --preset ultrafast -o out.hevc -
	ls -l out.hevc x265.log
	cat x265.log
	grep -F 'encoded 10 frames' x265.log \|\| exit 2
	grep -i 'error' x265.log && exit 1
	exit 0

	- name: Describe
	run: git describe --tags --long

	- name: Compress artifact for release
	if: github.event_name == 'workflow_dispatch' && github.event.inputs.tag != ''
	run: \|
	cd artifact
	7z a -t7z -mx=7 ../../VSORT-Windows-x64.${{ github.event.inputs.tag }}.7z .

	- name: Release
	uses: softprops/action-gh-release@v1
	if: github.event_name == 'workflow_dispatch' && github.event.inputs.tag != ''
	with:
	tag_name: ${{ inputs.tag }}
	files: VSORT-Windows-x64.${{ github.event.inputs.tag }}.7z
	fail_on_unmatched_files: true
	generate_release_notes: false
	prerelease: true

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

scripts/vsmlrt.py: add `prefer_nhwc` flag to the ort_cuda backend #276

Workflow file

scripts/vsmlrt.py: add `prefer_nhwc` flag to the ort_cuda backend #276

Jobs

Run details

Workflow file for this run

scripts/vsmlrt.py: add prefer_nhwc flag to the ort_cuda backend #276

Workflow file

Workflow file for this run

scripts/vsmlrt.py: add `prefer_nhwc` flag to the ort_cuda backend #276