Add VAAPI support to Qt GUI (Fix #26) (#205)

This commit is contained in:
Łukasz Siudut 2020-04-22 09:57:18 +01:00 committed by GitHub
commit 8c651b0890
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 269 additions and 40 deletions

View file

@ -28,15 +28,36 @@ extern "C"
#include <libavcodec/avcodec.h>
}
#define MAX_PANES 3
class VideoDecoder;
class AVOpenGLFrameUploader;
struct PlaneConfig
{
unsigned int width_divider;
unsigned int height_divider;
unsigned int data_per_pixel;
GLint internal_format;
GLenum format;
};
struct ConversionConfig
{
enum AVPixelFormat pixel_format;
const char *shader_vert_glsl;
const char *shader_frag_glsl;
unsigned int planes;
struct PlaneConfig plane_configs[MAX_PANES];
};
struct AVOpenGLFrame
{
GLuint pbo[3];
GLuint tex[3];
GLuint pbo[MAX_PANES];
GLuint tex[MAX_PANES];
unsigned int width;
unsigned int height;
ConversionConfig *conversion_config;
bool Update(AVFrame *frame, ChiakiLog *log);
};
@ -61,6 +82,8 @@ class AVOpenGLWidget: public QOpenGLWidget
QTimer *mouse_timer;
ConversionConfig *conversion_config;
public:
static QSurfaceFormat CreateSurfaceFormat();

View file

@ -21,6 +21,7 @@
#include <chiaki/session.h>
#include "host.h"
#include "videodecoder.h"
#include <QSettings>
@ -76,6 +77,9 @@ class Settings : public QObject
unsigned int GetBitrate() const;
void SetBitrate(unsigned int bitrate);
HardwareDecodeEngine GetHardwareDecodeEngine() const;
void SetHardwareDecodeEngine(HardwareDecodeEngine enabled);
unsigned int GetAudioBufferSizeDefault() const;
/**

View file

@ -39,6 +39,7 @@ class SettingsDialog : public QDialog
QComboBox *fps_combo_box;
QLineEdit *bitrate_edit;
QLineEdit *audio_buffer_size_edit;
QComboBox *hardware_decode_combo_box;
QListWidget *registered_hosts_list_widget;
QPushButton *delete_registered_host_button;
@ -52,6 +53,7 @@ class SettingsDialog : public QDialog
void FPSSelected();
void BitrateEdited();
void AudioBufferSizeEdited();
void HardwareDecodeEngineSelected();
void UpdateRegisteredHosts();
void UpdateRegisteredHostsButtons();

View file

@ -49,6 +49,7 @@ class ChiakiException: public Exception
struct StreamSessionConnectInfo
{
QMap<Qt::Key, int> key_map;
HardwareDecodeEngine hw_decode_engine;
uint32_t log_level_mask;
QString log_file;
QString host;

View file

@ -22,6 +22,7 @@
#include "exception.h"
#include <QMap>
#include <QMutex>
#include <QObject>
@ -32,6 +33,20 @@ extern "C"
#include <cstdint>
typedef enum {
HW_DECODE_NONE = 0,
HW_DECODE_VAAPI = 1,
HW_DECODE_VDPAU = 2,
} HardwareDecodeEngine;
static const QMap<HardwareDecodeEngine, const char *> hardware_decode_engine_names = {
{ HW_DECODE_NONE, "none"},
{ HW_DECODE_VAAPI, "vaapi"},
{ HW_DECODE_VDPAU, "vdpau"},
};
class VideoDecoderException: public Exception
{
public:
@ -43,23 +58,31 @@ class VideoDecoder: public QObject
Q_OBJECT
public:
VideoDecoder(ChiakiLog *log);
VideoDecoder(HardwareDecodeEngine hw_decode_engine, ChiakiLog *log);
~VideoDecoder();
void PushFrame(uint8_t *buf, size_t buf_size);
AVFrame *PullFrame();
AVFrame *GetFromHardware(AVFrame *hw_frame);
ChiakiLog *GetChiakiLog() { return log; }
enum AVPixelFormat PixelFormat() { return hw_decode_engine?AV_PIX_FMT_NV12:AV_PIX_FMT_YUV420P; }
signals:
void FramesAvailable();
private:
HardwareDecodeEngine hw_decode_engine;
ChiakiLog *log;
QMutex mutex;
AVCodec *codec;
AVCodecContext *codec_context;
enum AVPixelFormat hw_pix_fmt;
AVBufferRef *hw_device_ctx;
};
#endif // CHIAKI_VIDEODECODER_H

View file

@ -43,12 +43,35 @@ void main()
}
)glsl";
static const char *shader_frag_glsl = R"glsl(
static const char *yuv420p_shader_frag_glsl = R"glsl(
#version 150 core
uniform sampler2D tex_y;
uniform sampler2D tex_u;
uniform sampler2D tex_v;
uniform sampler2D plane1; // Y
uniform sampler2D plane2; // U
uniform sampler2D plane3; // V
in vec2 uv_var;
out vec4 out_color;
void main()
{
vec3 yuv = vec3(
(texture(plane1, uv_var).r - (16.0 / 255.0)) / ((235.0 - 16.0) / 255.0),
(texture(plane2, uv_var).r - (16.0 / 255.0)) / ((240.0 - 16.0) / 255.0) - 0.5,
(texture(plane3, uv_var).r - (16.0 / 255.0)) / ((240.0 - 16.0) / 255.0) - 0.5);
vec3 rgb = mat3(
1.0, 1.0, 1.0,
0.0, -0.21482, 2.12798,
1.28033, -0.38059, 0.0) * yuv;
out_color = vec4(rgb, 1.0);
}
)glsl";
static const char *nv12_shader_frag_glsl = R"glsl(
#version 150 core
uniform sampler2D plane1; // Y
uniform sampler2D plane2; // interlaced UV
in vec2 uv_var;
@ -57,9 +80,10 @@ out vec4 out_color;
void main()
{
vec3 yuv = vec3(
(texture(tex_y, uv_var).r - (16.0 / 255.0)) / ((235.0 - 16.0) / 255.0),
(texture(tex_u, uv_var).r - (16.0 / 255.0)) / ((240.0 - 16.0) / 255.0) - 0.5,
(texture(tex_v, uv_var).r - (16.0 / 255.0)) / ((240.0 - 16.0) / 255.0) - 0.5);
(texture(plane1, uv_var).r - (16.0 / 255.0)) / ((235.0 - 16.0) / 255.0),
(texture(plane2, uv_var).r - (16.0 / 255.0)) / ((240.0 - 16.0) / 255.0) - 0.5,
(texture(plane2, uv_var).g - (16.0 / 255.0)) / ((240.0 - 16.0) / 255.0) - 0.5
);
vec3 rgb = mat3(
1.0, 1.0, 1.0,
0.0, -0.21482, 2.12798,
@ -68,6 +92,30 @@ void main()
}
)glsl";
ConversionConfig conversion_configs[] = {
{
AV_PIX_FMT_YUV420P,
shader_vert_glsl,
yuv420p_shader_frag_glsl,
3,
{
{ 1, 1, 1, GL_R8, GL_RED },
{ 2, 2, 1, GL_R8, GL_RED },
{ 2, 2, 1, GL_R8, GL_RED }
}
},
{
AV_PIX_FMT_NV12,
shader_vert_glsl,
nv12_shader_frag_glsl,
2,
{
{ 1, 1, 1, GL_R8, GL_RED },
{ 2, 2, 2, GL_RG8, GL_RG }
}
}
};
static const float vert_pos[] = {
0.0f, 0.0f,
0.0f, 1.0f,
@ -93,6 +141,19 @@ AVOpenGLWidget::AVOpenGLWidget(VideoDecoder *decoder, QWidget *parent)
: QOpenGLWidget(parent),
decoder(decoder)
{
conversion_config = nullptr;
for(auto &cc: conversion_configs)
{
if(decoder->PixelFormat() == cc.pixel_format)
{
conversion_config = &cc;
break;
}
}
if(!conversion_config)
throw Exception("No matching video conversion config can be found");
setFormat(CreateSurfaceFormat());
frame_uploader_context = nullptr;
@ -146,7 +207,7 @@ bool AVOpenGLFrame::Update(AVFrame *frame, ChiakiLog *log)
{
auto f = QOpenGLContext::currentContext()->extraFunctions();
if(frame->format != AV_PIX_FMT_YUV420P)
if(frame->format != conversion_config->pixel_format)
{
CHIAKI_LOGE(log, "AVOpenGLFrame got AVFrame with invalid format");
return false;
@ -155,20 +216,16 @@ bool AVOpenGLFrame::Update(AVFrame *frame, ChiakiLog *log)
width = frame->width;
height = frame->height;
for(int i=0; i<3; i++)
for(int i=0; i<conversion_config->planes; i++)
{
int width = frame->width;
int height = frame->height;
if(i > 0)
{
width /= 2;
height /= 2;
}
int width = frame->width / conversion_config->plane_configs[i].width_divider;
int height = frame->height / conversion_config->plane_configs[i].height_divider;
int size = width * height * conversion_config->plane_configs[i].data_per_pixel;
f->glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo[i]);
f->glBufferData(GL_PIXEL_UNPACK_BUFFER, width * height, nullptr, GL_STREAM_DRAW);
f->glBufferData(GL_PIXEL_UNPACK_BUFFER, size, nullptr, GL_STREAM_DRAW);
auto buf = reinterpret_cast<uint8_t *>(f->glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, width * height, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
auto buf = reinterpret_cast<uint8_t *>(f->glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, size, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
if(!buf)
{
CHIAKI_LOGE(log, "AVOpenGLFrame failed to map PBO");
@ -176,17 +233,17 @@ bool AVOpenGLFrame::Update(AVFrame *frame, ChiakiLog *log)
}
if(frame->linesize[i] == width)
memcpy(buf, frame->data[i], width * height);
memcpy(buf, frame->data[i], size);
else
{
for(int l=0; l<height; l++)
memcpy(buf + width * l, frame->data[i] + frame->linesize[i] * l, width);
memcpy(buf + width * l * conversion_config->plane_configs[i].data_per_pixel, frame->data[i] + frame->linesize[i] * l, width * conversion_config->plane_configs[i].data_per_pixel);
}
f->glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
f->glBindTexture(GL_TEXTURE_2D, tex[i]);
f->glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, width, height, 0, GL_RED, GL_UNSIGNED_BYTE, nullptr);
f->glTexImage2D(GL_TEXTURE_2D, 0, conversion_config->plane_configs[i].internal_format, width, height, 0, conversion_config->plane_configs[i].format, GL_UNSIGNED_BYTE, nullptr);
}
f->glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
@ -226,12 +283,12 @@ void AVOpenGLWidget::initializeGL()
};
GLuint shader_vert = f->glCreateShader(GL_VERTEX_SHADER);
f->glShaderSource(shader_vert, 1, &shader_vert_glsl, nullptr);
f->glShaderSource(shader_vert, 1, &conversion_config->shader_vert_glsl, nullptr);
f->glCompileShader(shader_vert);
CheckShaderCompiled(shader_vert);
GLuint shader_frag = f->glCreateShader(GL_FRAGMENT_SHADER);
f->glShaderSource(shader_frag, 1, &shader_frag_glsl, nullptr);
f->glShaderSource(shader_frag, 1, &conversion_config->shader_frag_glsl, nullptr);
f->glCompileShader(shader_frag);
CheckShaderCompiled(shader_frag);
@ -256,26 +313,31 @@ void AVOpenGLWidget::initializeGL()
for(int i=0; i<2; i++)
{
f->glGenTextures(3, frames[i].tex);
f->glGenBuffers(3, frames[i].pbo);
uint8_t uv_default = 127;
for(int j=0; j<3; j++)
frames[i].conversion_config = conversion_config;
f->glGenTextures(conversion_config->planes, frames[i].tex);
f->glGenBuffers(conversion_config->planes, frames[i].pbo);
uint8_t uv_default[] = {0x7f, 0x7f};
for(int j=0; j<conversion_config->planes; j++)
{
f->glBindTexture(GL_TEXTURE_2D, frames[i].tex[j]);
f->glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
f->glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
f->glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
f->glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
f->glTexImage2D(GL_TEXTURE_2D, 0, GL_R8, 1, 1, 0, GL_RED, GL_UNSIGNED_BYTE, j > 0 ? &uv_default : nullptr);
f->glTexImage2D(GL_TEXTURE_2D, 0, conversion_config->plane_configs[j].internal_format, 1, 1, 0, conversion_config->plane_configs[j].format, GL_UNSIGNED_BYTE, j > 0 ? uv_default : nullptr);
}
frames[i].width = 0;
frames[i].height = 0;
}
f->glUseProgram(program);
f->glUniform1i(f->glGetUniformLocation(program, "tex_y"), 0);
f->glUniform1i(f->glGetUniformLocation(program, "tex_u"), 1);
f->glUniform1i(f->glGetUniformLocation(program, "tex_v"), 2);
// bind only as many planes as we need
const char *plane_names[] = {"plane1", "plane2", "plane3"};
for(int i=0; i<sizeof(plane_names)/sizeof(char *); i++)
{
f->glUniform1i(f->glGetUniformLocation(program, plane_names[i]), i);
}
f->glGenVertexArrays(1, &vao);
f->glBindVertexArray(vao);

View file

@ -94,6 +94,25 @@ unsigned int Settings::GetAudioBufferSizeRaw() const
return settings.value("settings/audio_buffer_size", 0).toUInt();
}
static const QMap<HardwareDecodeEngine, QString> hw_decode_engine_values = {
{ HW_DECODE_NONE, "none" },
{ HW_DECODE_VAAPI, "vaapi" },
{ HW_DECODE_VDPAU, "vdpau" }
};
static const HardwareDecodeEngine hw_decode_engine_default = HW_DECODE_NONE;
HardwareDecodeEngine Settings::GetHardwareDecodeEngine() const
{
auto v = settings.value("settings/hw_decode_engine", hw_decode_engine_values[hw_decode_engine_default]).toString();
return hw_decode_engine_values.key(v, hw_decode_engine_default);
}
void Settings::SetHardwareDecodeEngine(HardwareDecodeEngine engine)
{
settings.setValue("settings/hw_decode_engine", hw_decode_engine_values[engine]);
}
unsigned int Settings::GetAudioBufferSize() const
{
unsigned int v = GetAudioBufferSizeRaw();

View file

@ -20,6 +20,7 @@
#include <settingskeycapturedialog.h>
#include <registdialog.h>
#include <sessionlog.h>
#include <videodecoder.h>
#include <QHBoxLayout>
#include <QVBoxLayout>
@ -146,6 +147,29 @@ SettingsDialog::SettingsDialog(Settings *settings, QWidget *parent) : QDialog(pa
audio_buffer_size_edit->setPlaceholderText(tr("Default (%1)").arg(settings->GetAudioBufferSizeDefault()));
connect(audio_buffer_size_edit, &QLineEdit::textEdited, this, &SettingsDialog::AudioBufferSizeEdited);
// Decode Settings
auto decode_settings = new QGroupBox(tr("Decode Settings"));
left_layout->addWidget(decode_settings);
auto decode_settings_layout = new QFormLayout();
decode_settings->setLayout(decode_settings_layout);
hardware_decode_combo_box = new QComboBox(this);
static const QList<QPair<HardwareDecodeEngine, const char *>> hardware_decode_engines = {
{ HW_DECODE_NONE, "none"},
{ HW_DECODE_VAAPI, "vaapi"}
};
auto current_hardware_decode_engine = settings->GetHardwareDecodeEngine();
for(const auto &p : hardware_decode_engines)
{
hardware_decode_combo_box->addItem(p.second, (int)p.first);
if(current_hardware_decode_engine == p.first)
hardware_decode_combo_box->setCurrentIndex(hardware_decode_combo_box->count() - 1);
}
connect(hardware_decode_combo_box, SIGNAL(currentIndexChanged(int)), this, SLOT(HardwareDecodeEngineSelected()));
decode_settings_layout->addRow(tr("Hardware decode method:"), hardware_decode_combo_box);
// Registered Consoles
auto registered_hosts_group_box = new QGroupBox(tr("Registered Consoles"));
@ -243,6 +267,11 @@ void SettingsDialog::AudioBufferSizeEdited()
settings->SetAudioBufferSize(audio_buffer_size_edit->text().toUInt());
}
void SettingsDialog::HardwareDecodeEngineSelected()
{
settings->SetHardwareDecodeEngine((HardwareDecodeEngine)hardware_decode_combo_box->currentData().toInt());
}
void SettingsDialog::UpdateBitratePlaceholder()
{
bitrate_edit->setPlaceholderText(tr("Automatic (%1)").arg(settings->GetVideoProfile().bitrate));

View file

@ -35,6 +35,7 @@
StreamSessionConnectInfo::StreamSessionConnectInfo(Settings *settings, QString host, QByteArray regist_key, QByteArray morning)
{
key_map = settings->GetControllerMappingForDecoding();
hw_decode_engine = settings->GetHardwareDecodeEngine();
log_level_mask = settings->GetLogLevelMask();
log_file = CreateLogFilename();
video_profile = settings->GetVideoProfile();
@ -56,7 +57,7 @@ StreamSession::StreamSession(const StreamSessionConnectInfo &connect_info, QObje
gamepad(nullptr),
#endif
controller(nullptr),
video_decoder(log.GetChiakiLog()),
video_decoder(connect_info.hw_decode_engine, log.GetChiakiLog()),
audio_output(nullptr),
audio_io(nullptr)
{

View file

@ -21,10 +21,13 @@
#include <QImage>
VideoDecoder::VideoDecoder(ChiakiLog *log) : log(log)
VideoDecoder::VideoDecoder(HardwareDecodeEngine hw_decode_engine, ChiakiLog *log) : hw_decode_engine(hw_decode_engine), log(log)
{
enum AVHWDeviceType type;
hw_device_ctx = nullptr;
#if LIBAVCODEC_VERSION_INT < AV_VERSION_INT(58, 10, 100)
avcodec_register_all();
avcodec_register_all();
#endif
codec = avcodec_find_decoder(AV_CODEC_ID_H264);
if(!codec)
@ -34,6 +37,34 @@ VideoDecoder::VideoDecoder(ChiakiLog *log) : log(log)
if(!codec_context)
throw VideoDecoderException("Failed to alloc codec context");
if(hw_decode_engine)
{
if(!hardware_decode_engine_names.contains(hw_decode_engine))
throw VideoDecoderException("Unknown hardware decode engine!");
const char *hw_dec_eng = hardware_decode_engine_names[hw_decode_engine];
CHIAKI_LOGI(log, "Using hardware decode %s", hw_dec_eng);
type = av_hwdevice_find_type_by_name(hw_dec_eng);
if (type == AV_HWDEVICE_TYPE_NONE)
throw VideoDecoderException("Can't initialize vaapi");
for(int i = 0;; i++) {
const AVCodecHWConfig *config = avcodec_get_hw_config(codec, i);
if(!config)
throw VideoDecoderException("avcodec_get_hw_config failed");
if(config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX &&
config->device_type == type)
{
hw_pix_fmt = config->pix_fmt;
break;
}
}
if(av_hwdevice_ctx_create(&hw_device_ctx, type, NULL, NULL, 0) < 0)
throw VideoDecoderException("Failed to create hwdevice context");
codec_context->hw_device_ctx = av_buffer_ref(hw_device_ctx);
}
if(avcodec_open2(codec_context, codec, nullptr) < 0)
{
avcodec_free_context(&codec_context);
@ -45,6 +76,10 @@ VideoDecoder::~VideoDecoder()
{
avcodec_close(codec_context);
avcodec_free_context(&codec_context);
if(hw_device_ctx)
{
av_buffer_unref(&hw_device_ctx);
}
}
void VideoDecoder::PushFrame(uint8_t *buf, size_t buf_size)
@ -98,7 +133,8 @@ AVFrame *VideoDecoder::PullFrame()
// always try to pull as much as possible and return only the very last frame
AVFrame *frame_last = nullptr;
AVFrame *frame = nullptr;
AVFrame *sw_frame = nullptr;
AVFrame *frame = nullptr;
while(true)
{
AVFrame *next_frame;
@ -116,7 +152,11 @@ AVFrame *VideoDecoder::PullFrame()
frame_last = frame;
frame = next_frame;
int r = avcodec_receive_frame(codec_context, frame);
if(r != 0)
if(r == 0)
{
frame = hw_decode_engine ? GetFromHardware(frame) : frame;
}
else
{
if(r != AVERROR(EAGAIN))
CHIAKI_LOGE(log, "Decoding with FFMPEG failed");
@ -125,3 +165,28 @@ AVFrame *VideoDecoder::PullFrame()
}
}
}
AVFrame *VideoDecoder::GetFromHardware(AVFrame *hw_frame)
{
AVFrame *frame;
AVFrame *sw_frame;
sw_frame = av_frame_alloc();
int ret = av_hwframe_transfer_data(sw_frame, hw_frame, 0);
if(ret < 0)
{
CHIAKI_LOGE(log, "Failed to transfer frame from hardware");
}
av_frame_unref(hw_frame);
if(sw_frame->width <= 0)
{
av_frame_unref(sw_frame);
return nullptr;
}
return sw_frame;
}