From 8c651b08905cae160a3751c5165cd2f572a7677c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Siudut?= Date: Wed, 22 Apr 2020 09:57:18 +0100 Subject: [PATCH] Add VAAPI support to Qt GUI (Fix #26) (#205) --- gui/include/avopenglwidget.h | 27 +++++++- gui/include/settings.h | 4 ++ gui/include/settingsdialog.h | 2 + gui/include/streamsession.h | 1 + gui/include/videodecoder.h | 25 ++++++- gui/src/avopenglwidget.cpp | 124 ++++++++++++++++++++++++++--------- gui/src/settings.cpp | 19 ++++++ gui/src/settingsdialog.cpp | 29 ++++++++ gui/src/streamsession.cpp | 3 +- gui/src/videodecoder.cpp | 73 +++++++++++++++++++-- scripts/build-ffmpeg.sh | 2 +- 11 files changed, 269 insertions(+), 40 deletions(-) diff --git a/gui/include/avopenglwidget.h b/gui/include/avopenglwidget.h index 6451d8f..312f16c 100644 --- a/gui/include/avopenglwidget.h +++ b/gui/include/avopenglwidget.h @@ -28,15 +28,36 @@ extern "C" #include } +#define MAX_PANES 3 + class VideoDecoder; class AVOpenGLFrameUploader; +struct PlaneConfig +{ + unsigned int width_divider; + unsigned int height_divider; + unsigned int data_per_pixel; + GLint internal_format; + GLenum format; +}; + +struct ConversionConfig +{ + enum AVPixelFormat pixel_format; + const char *shader_vert_glsl; + const char *shader_frag_glsl; + unsigned int planes; + struct PlaneConfig plane_configs[MAX_PANES]; +}; + struct AVOpenGLFrame { - GLuint pbo[3]; - GLuint tex[3]; + GLuint pbo[MAX_PANES]; + GLuint tex[MAX_PANES]; unsigned int width; unsigned int height; + ConversionConfig *conversion_config; bool Update(AVFrame *frame, ChiakiLog *log); }; @@ -61,6 +82,8 @@ class AVOpenGLWidget: public QOpenGLWidget QTimer *mouse_timer; + ConversionConfig *conversion_config; + public: static QSurfaceFormat CreateSurfaceFormat(); diff --git a/gui/include/settings.h b/gui/include/settings.h index e15a56a..c52b42f 100644 --- a/gui/include/settings.h +++ b/gui/include/settings.h @@ -21,6 +21,7 @@ #include #include "host.h" +#include "videodecoder.h" #include @@ -76,6 +77,9 @@ class Settings : public QObject unsigned int GetBitrate() const; void SetBitrate(unsigned int bitrate); + HardwareDecodeEngine GetHardwareDecodeEngine() const; + void SetHardwareDecodeEngine(HardwareDecodeEngine enabled); + unsigned int GetAudioBufferSizeDefault() const; /** diff --git a/gui/include/settingsdialog.h b/gui/include/settingsdialog.h index abc5a33..13a40c9 100644 --- a/gui/include/settingsdialog.h +++ b/gui/include/settingsdialog.h @@ -39,6 +39,7 @@ class SettingsDialog : public QDialog QComboBox *fps_combo_box; QLineEdit *bitrate_edit; QLineEdit *audio_buffer_size_edit; + QComboBox *hardware_decode_combo_box; QListWidget *registered_hosts_list_widget; QPushButton *delete_registered_host_button; @@ -52,6 +53,7 @@ class SettingsDialog : public QDialog void FPSSelected(); void BitrateEdited(); void AudioBufferSizeEdited(); + void HardwareDecodeEngineSelected(); void UpdateRegisteredHosts(); void UpdateRegisteredHostsButtons(); diff --git a/gui/include/streamsession.h b/gui/include/streamsession.h index fabb634..15ac4c5 100644 --- a/gui/include/streamsession.h +++ b/gui/include/streamsession.h @@ -49,6 +49,7 @@ class ChiakiException: public Exception struct StreamSessionConnectInfo { QMap key_map; + HardwareDecodeEngine hw_decode_engine; uint32_t log_level_mask; QString log_file; QString host; diff --git a/gui/include/videodecoder.h b/gui/include/videodecoder.h index e71e3c6..84ccf0a 100644 --- a/gui/include/videodecoder.h +++ b/gui/include/videodecoder.h @@ -22,6 +22,7 @@ #include "exception.h" +#include #include #include @@ -32,6 +33,20 @@ extern "C" #include + +typedef enum { + HW_DECODE_NONE = 0, + HW_DECODE_VAAPI = 1, + HW_DECODE_VDPAU = 2, +} HardwareDecodeEngine; + + +static const QMap hardware_decode_engine_names = { + { HW_DECODE_NONE, "none"}, + { HW_DECODE_VAAPI, "vaapi"}, + { HW_DECODE_VDPAU, "vdpau"}, +}; + class VideoDecoderException: public Exception { public: @@ -43,23 +58,31 @@ class VideoDecoder: public QObject Q_OBJECT public: - VideoDecoder(ChiakiLog *log); + VideoDecoder(HardwareDecodeEngine hw_decode_engine, ChiakiLog *log); ~VideoDecoder(); void PushFrame(uint8_t *buf, size_t buf_size); AVFrame *PullFrame(); + AVFrame *GetFromHardware(AVFrame *hw_frame); ChiakiLog *GetChiakiLog() { return log; } + enum AVPixelFormat PixelFormat() { return hw_decode_engine?AV_PIX_FMT_NV12:AV_PIX_FMT_YUV420P; } + signals: void FramesAvailable(); private: + HardwareDecodeEngine hw_decode_engine; + ChiakiLog *log; QMutex mutex; AVCodec *codec; AVCodecContext *codec_context; + + enum AVPixelFormat hw_pix_fmt; + AVBufferRef *hw_device_ctx; }; #endif // CHIAKI_VIDEODECODER_H diff --git a/gui/src/avopenglwidget.cpp b/gui/src/avopenglwidget.cpp index 164836d..11d4b09 100644 --- a/gui/src/avopenglwidget.cpp +++ b/gui/src/avopenglwidget.cpp @@ -43,12 +43,35 @@ void main() } )glsl"; -static const char *shader_frag_glsl = R"glsl( +static const char *yuv420p_shader_frag_glsl = R"glsl( #version 150 core -uniform sampler2D tex_y; -uniform sampler2D tex_u; -uniform sampler2D tex_v; +uniform sampler2D plane1; // Y +uniform sampler2D plane2; // U +uniform sampler2D plane3; // V + +in vec2 uv_var; +out vec4 out_color; + +void main() +{ + vec3 yuv = vec3( + (texture(plane1, uv_var).r - (16.0 / 255.0)) / ((235.0 - 16.0) / 255.0), + (texture(plane2, uv_var).r - (16.0 / 255.0)) / ((240.0 - 16.0) / 255.0) - 0.5, + (texture(plane3, uv_var).r - (16.0 / 255.0)) / ((240.0 - 16.0) / 255.0) - 0.5); + vec3 rgb = mat3( + 1.0, 1.0, 1.0, + 0.0, -0.21482, 2.12798, + 1.28033, -0.38059, 0.0) * yuv; + out_color = vec4(rgb, 1.0); +} +)glsl"; + +static const char *nv12_shader_frag_glsl = R"glsl( +#version 150 core + +uniform sampler2D plane1; // Y +uniform sampler2D plane2; // interlaced UV in vec2 uv_var; @@ -57,9 +80,10 @@ out vec4 out_color; void main() { vec3 yuv = vec3( - (texture(tex_y, uv_var).r - (16.0 / 255.0)) / ((235.0 - 16.0) / 255.0), - (texture(tex_u, uv_var).r - (16.0 / 255.0)) / ((240.0 - 16.0) / 255.0) - 0.5, - (texture(tex_v, uv_var).r - (16.0 / 255.0)) / ((240.0 - 16.0) / 255.0) - 0.5); + (texture(plane1, uv_var).r - (16.0 / 255.0)) / ((235.0 - 16.0) / 255.0), + (texture(plane2, uv_var).r - (16.0 / 255.0)) / ((240.0 - 16.0) / 255.0) - 0.5, + (texture(plane2, uv_var).g - (16.0 / 255.0)) / ((240.0 - 16.0) / 255.0) - 0.5 + ); vec3 rgb = mat3( 1.0, 1.0, 1.0, 0.0, -0.21482, 2.12798, @@ -68,6 +92,30 @@ void main() } )glsl"; +ConversionConfig conversion_configs[] = { + { + AV_PIX_FMT_YUV420P, + shader_vert_glsl, + yuv420p_shader_frag_glsl, + 3, + { + { 1, 1, 1, GL_R8, GL_RED }, + { 2, 2, 1, GL_R8, GL_RED }, + { 2, 2, 1, GL_R8, GL_RED } + } + }, + { + AV_PIX_FMT_NV12, + shader_vert_glsl, + nv12_shader_frag_glsl, + 2, + { + { 1, 1, 1, GL_R8, GL_RED }, + { 2, 2, 2, GL_RG8, GL_RG } + } + } +}; + static const float vert_pos[] = { 0.0f, 0.0f, 0.0f, 1.0f, @@ -93,6 +141,19 @@ AVOpenGLWidget::AVOpenGLWidget(VideoDecoder *decoder, QWidget *parent) : QOpenGLWidget(parent), decoder(decoder) { + conversion_config = nullptr; + for(auto &cc: conversion_configs) + { + if(decoder->PixelFormat() == cc.pixel_format) + { + conversion_config = &cc; + break; + } + } + + if(!conversion_config) + throw Exception("No matching video conversion config can be found"); + setFormat(CreateSurfaceFormat()); frame_uploader_context = nullptr; @@ -146,7 +207,7 @@ bool AVOpenGLFrame::Update(AVFrame *frame, ChiakiLog *log) { auto f = QOpenGLContext::currentContext()->extraFunctions(); - if(frame->format != AV_PIX_FMT_YUV420P) + if(frame->format != conversion_config->pixel_format) { CHIAKI_LOGE(log, "AVOpenGLFrame got AVFrame with invalid format"); return false; @@ -155,20 +216,16 @@ bool AVOpenGLFrame::Update(AVFrame *frame, ChiakiLog *log) width = frame->width; height = frame->height; - for(int i=0; i<3; i++) + for(int i=0; iplanes; i++) { - int width = frame->width; - int height = frame->height; - if(i > 0) - { - width /= 2; - height /= 2; - } + int width = frame->width / conversion_config->plane_configs[i].width_divider; + int height = frame->height / conversion_config->plane_configs[i].height_divider; + int size = width * height * conversion_config->plane_configs[i].data_per_pixel; f->glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo[i]); - f->glBufferData(GL_PIXEL_UNPACK_BUFFER, width * height, nullptr, GL_STREAM_DRAW); + f->glBufferData(GL_PIXEL_UNPACK_BUFFER, size, nullptr, GL_STREAM_DRAW); - auto buf = reinterpret_cast(f->glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, width * height, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT)); + auto buf = reinterpret_cast(f->glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT)); if(!buf) { CHIAKI_LOGE(log, "AVOpenGLFrame failed to map PBO"); @@ -176,17 +233,17 @@ bool AVOpenGLFrame::Update(AVFrame *frame, ChiakiLog *log) } if(frame->linesize[i] == width) - memcpy(buf, frame->data[i], width * height); + memcpy(buf, frame->data[i], size); else { for(int l=0; ldata[i] + frame->linesize[i] * l, width); + memcpy(buf + width * l * conversion_config->plane_configs[i].data_per_pixel, frame->data[i] + frame->linesize[i] * l, width * conversion_config->plane_configs[i].data_per_pixel); } f->glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); f->glBindTexture(GL_TEXTURE_2D, tex[i]); - f->glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr); + f->glTexImage2D(GL_TEXTURE_2D, 0, conversion_config->plane_configs[i].internal_format, width, height, 0, conversion_config->plane_configs[i].format, GL_UNSIGNED_BYTE, nullptr); } f->glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); @@ -226,12 +283,12 @@ void AVOpenGLWidget::initializeGL() }; GLuint shader_vert = f->glCreateShader(GL_VERTEX_SHADER); - f->glShaderSource(shader_vert, 1, &shader_vert_glsl, nullptr); + f->glShaderSource(shader_vert, 1, &conversion_config->shader_vert_glsl, nullptr); f->glCompileShader(shader_vert); CheckShaderCompiled(shader_vert); GLuint shader_frag = f->glCreateShader(GL_FRAGMENT_SHADER); - f->glShaderSource(shader_frag, 1, &shader_frag_glsl, nullptr); + f->glShaderSource(shader_frag, 1, &conversion_config->shader_frag_glsl, nullptr); f->glCompileShader(shader_frag); CheckShaderCompiled(shader_frag); @@ -256,26 +313,31 @@ void AVOpenGLWidget::initializeGL() for(int i=0; i<2; i++) { - f->glGenTextures(3, frames[i].tex); - f->glGenBuffers(3, frames[i].pbo); - uint8_t uv_default = 127; - for(int j=0; j<3; j++) + frames[i].conversion_config = conversion_config; + f->glGenTextures(conversion_config->planes, frames[i].tex); + f->glGenBuffers(conversion_config->planes, frames[i].pbo); + uint8_t uv_default[] = {0x7f, 0x7f}; + for(int j=0; jplanes; j++) { f->glBindTexture(GL_TEXTURE_2D, frames[i].tex[j]); f->glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); f->glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); f->glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); f->glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - f->glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, 1, 1, 0, GL_RED, GL_UNSIGNED_BYTE, j > 0 ? &uv_default : nullptr); + f->glTexImage2D(GL_TEXTURE_2D, 0, conversion_config->plane_configs[j].internal_format, 1, 1, 0, conversion_config->plane_configs[j].format, GL_UNSIGNED_BYTE, j > 0 ? uv_default : nullptr); } frames[i].width = 0; frames[i].height = 0; } f->glUseProgram(program); - f->glUniform1i(f->glGetUniformLocation(program, "tex_y"), 0); - f->glUniform1i(f->glGetUniformLocation(program, "tex_u"), 1); - f->glUniform1i(f->glGetUniformLocation(program, "tex_v"), 2); + + // bind only as many planes as we need + const char *plane_names[] = {"plane1", "plane2", "plane3"}; + for(int i=0; iglUniform1i(f->glGetUniformLocation(program, plane_names[i]), i); + } f->glGenVertexArrays(1, &vao); f->glBindVertexArray(vao); diff --git a/gui/src/settings.cpp b/gui/src/settings.cpp index 559c4e4..90a5c03 100644 --- a/gui/src/settings.cpp +++ b/gui/src/settings.cpp @@ -94,6 +94,25 @@ unsigned int Settings::GetAudioBufferSizeRaw() const return settings.value("settings/audio_buffer_size", 0).toUInt(); } +static const QMap hw_decode_engine_values = { + { HW_DECODE_NONE, "none" }, + { HW_DECODE_VAAPI, "vaapi" }, + { HW_DECODE_VDPAU, "vdpau" } +}; + +static const HardwareDecodeEngine hw_decode_engine_default = HW_DECODE_NONE; + +HardwareDecodeEngine Settings::GetHardwareDecodeEngine() const +{ + auto v = settings.value("settings/hw_decode_engine", hw_decode_engine_values[hw_decode_engine_default]).toString(); + return hw_decode_engine_values.key(v, hw_decode_engine_default); +} + +void Settings::SetHardwareDecodeEngine(HardwareDecodeEngine engine) +{ + settings.setValue("settings/hw_decode_engine", hw_decode_engine_values[engine]); +} + unsigned int Settings::GetAudioBufferSize() const { unsigned int v = GetAudioBufferSizeRaw(); diff --git a/gui/src/settingsdialog.cpp b/gui/src/settingsdialog.cpp index 30dc00a..3b0892a 100644 --- a/gui/src/settingsdialog.cpp +++ b/gui/src/settingsdialog.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -146,6 +147,29 @@ SettingsDialog::SettingsDialog(Settings *settings, QWidget *parent) : QDialog(pa audio_buffer_size_edit->setPlaceholderText(tr("Default (%1)").arg(settings->GetAudioBufferSizeDefault())); connect(audio_buffer_size_edit, &QLineEdit::textEdited, this, &SettingsDialog::AudioBufferSizeEdited); + // Decode Settings + + auto decode_settings = new QGroupBox(tr("Decode Settings")); + left_layout->addWidget(decode_settings); + + auto decode_settings_layout = new QFormLayout(); + decode_settings->setLayout(decode_settings_layout); + + hardware_decode_combo_box = new QComboBox(this); + static const QList> hardware_decode_engines = { + { HW_DECODE_NONE, "none"}, + { HW_DECODE_VAAPI, "vaapi"} + }; + auto current_hardware_decode_engine = settings->GetHardwareDecodeEngine(); + for(const auto &p : hardware_decode_engines) + { + hardware_decode_combo_box->addItem(p.second, (int)p.first); + if(current_hardware_decode_engine == p.first) + hardware_decode_combo_box->setCurrentIndex(hardware_decode_combo_box->count() - 1); + } + connect(hardware_decode_combo_box, SIGNAL(currentIndexChanged(int)), this, SLOT(HardwareDecodeEngineSelected())); + decode_settings_layout->addRow(tr("Hardware decode method:"), hardware_decode_combo_box); + // Registered Consoles auto registered_hosts_group_box = new QGroupBox(tr("Registered Consoles")); @@ -243,6 +267,11 @@ void SettingsDialog::AudioBufferSizeEdited() settings->SetAudioBufferSize(audio_buffer_size_edit->text().toUInt()); } +void SettingsDialog::HardwareDecodeEngineSelected() +{ + settings->SetHardwareDecodeEngine((HardwareDecodeEngine)hardware_decode_combo_box->currentData().toInt()); +} + void SettingsDialog::UpdateBitratePlaceholder() { bitrate_edit->setPlaceholderText(tr("Automatic (%1)").arg(settings->GetVideoProfile().bitrate)); diff --git a/gui/src/streamsession.cpp b/gui/src/streamsession.cpp index cccab98..bad6305 100644 --- a/gui/src/streamsession.cpp +++ b/gui/src/streamsession.cpp @@ -35,6 +35,7 @@ StreamSessionConnectInfo::StreamSessionConnectInfo(Settings *settings, QString host, QByteArray regist_key, QByteArray morning) { key_map = settings->GetControllerMappingForDecoding(); + hw_decode_engine = settings->GetHardwareDecodeEngine(); log_level_mask = settings->GetLogLevelMask(); log_file = CreateLogFilename(); video_profile = settings->GetVideoProfile(); @@ -56,7 +57,7 @@ StreamSession::StreamSession(const StreamSessionConnectInfo &connect_info, QObje gamepad(nullptr), #endif controller(nullptr), - video_decoder(log.GetChiakiLog()), + video_decoder(connect_info.hw_decode_engine, log.GetChiakiLog()), audio_output(nullptr), audio_io(nullptr) { diff --git a/gui/src/videodecoder.cpp b/gui/src/videodecoder.cpp index 4937142..ba59fe0 100644 --- a/gui/src/videodecoder.cpp +++ b/gui/src/videodecoder.cpp @@ -21,10 +21,13 @@ #include -VideoDecoder::VideoDecoder(ChiakiLog *log) : log(log) +VideoDecoder::VideoDecoder(HardwareDecodeEngine hw_decode_engine, ChiakiLog *log) : hw_decode_engine(hw_decode_engine), log(log) { + enum AVHWDeviceType type; + hw_device_ctx = nullptr; + #if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 10, 100) - avcodec_register_all(); + avcodec_register_all(); #endif codec = avcodec_find_decoder(AV_CODEC_ID_H264); if(!codec) @@ -34,6 +37,34 @@ VideoDecoder::VideoDecoder(ChiakiLog *log) : log(log) if(!codec_context) throw VideoDecoderException("Failed to alloc codec context"); + if(hw_decode_engine) + { + if(!hardware_decode_engine_names.contains(hw_decode_engine)) + throw VideoDecoderException("Unknown hardware decode engine!"); + + const char *hw_dec_eng = hardware_decode_engine_names[hw_decode_engine]; + CHIAKI_LOGI(log, "Using hardware decode %s", hw_dec_eng); + type = av_hwdevice_find_type_by_name(hw_dec_eng); + if (type == AV_HWDEVICE_TYPE_NONE) + throw VideoDecoderException("Can't initialize vaapi"); + + for(int i = 0;; i++) { + const AVCodecHWConfig *config = avcodec_get_hw_config(codec, i); + if(!config) + throw VideoDecoderException("avcodec_get_hw_config failed"); + if(config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && + config->device_type == type) + { + hw_pix_fmt = config->pix_fmt; + break; + } + } + + if(av_hwdevice_ctx_create(&hw_device_ctx, type, NULL, NULL, 0) < 0) + throw VideoDecoderException("Failed to create hwdevice context"); + codec_context->hw_device_ctx = av_buffer_ref(hw_device_ctx); + } + if(avcodec_open2(codec_context, codec, nullptr) < 0) { avcodec_free_context(&codec_context); @@ -45,6 +76,10 @@ VideoDecoder::~VideoDecoder() { avcodec_close(codec_context); avcodec_free_context(&codec_context); + if(hw_device_ctx) + { + av_buffer_unref(&hw_device_ctx); + } } void VideoDecoder::PushFrame(uint8_t *buf, size_t buf_size) @@ -98,7 +133,8 @@ AVFrame *VideoDecoder::PullFrame() // always try to pull as much as possible and return only the very last frame AVFrame *frame_last = nullptr; - AVFrame *frame = nullptr; + AVFrame *sw_frame = nullptr; + AVFrame *frame = nullptr; while(true) { AVFrame *next_frame; @@ -116,7 +152,11 @@ AVFrame *VideoDecoder::PullFrame() frame_last = frame; frame = next_frame; int r = avcodec_receive_frame(codec_context, frame); - if(r != 0) + if(r == 0) + { + frame = hw_decode_engine ? GetFromHardware(frame) : frame; + } + else { if(r != AVERROR(EAGAIN)) CHIAKI_LOGE(log, "Decoding with FFMPEG failed"); @@ -125,3 +165,28 @@ AVFrame *VideoDecoder::PullFrame() } } } + +AVFrame *VideoDecoder::GetFromHardware(AVFrame *hw_frame) +{ + AVFrame *frame; + AVFrame *sw_frame; + + sw_frame = av_frame_alloc(); + + int ret = av_hwframe_transfer_data(sw_frame, hw_frame, 0); + + if(ret < 0) + { + CHIAKI_LOGE(log, "Failed to transfer frame from hardware"); + } + + av_frame_unref(hw_frame); + + if(sw_frame->width <= 0) + { + av_frame_unref(sw_frame); + return nullptr; + } + + return sw_frame; +} diff --git a/scripts/build-ffmpeg.sh b/scripts/build-ffmpeg.sh index db73be4..af44a22 100755 --- a/scripts/build-ffmpeg.sh +++ b/scripts/build-ffmpeg.sh @@ -9,4 +9,4 @@ git clone https://git.ffmpeg.org/ffmpeg.git --depth 1 -b $TAG && cd ffmpeg || ex ./configure --disable-all --enable-avcodec --enable-decoder=h264 --prefix="$ROOT/ffmpeg-prefix" "$@" || exit 1 make -j4 || exit 1 -make install || exit 1 \ No newline at end of file +make install || exit 1