Add GLFFT
This commit is contained in:
19
glfft/LICENSE_ORIGINAL
Normal file
19
glfft/LICENSE_ORIGINAL
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
Copyright (c) 2015 Hans-Kristian Arntzen <maister@archlinux.us>
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
THE SOFTWARE.
|
||||||
1125
glfft/glfft.cpp
Normal file
1125
glfft/glfft.cpp
Normal file
File diff suppressed because it is too large
Load Diff
225
glfft/glfft.hpp
Normal file
225
glfft/glfft.hpp
Normal file
@@ -0,0 +1,225 @@
|
|||||||
|
/* Copyright (C) 2015 Hans-Kristian Arntzen <maister@archlinux.us>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge,
|
||||||
|
* to any person obtaining a copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation the rights to
|
||||||
|
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||||
|
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef GLFFT_HPP__
|
||||||
|
#define GLFFT_HPP__
|
||||||
|
|
||||||
|
#include "glfft_interface.hpp"
|
||||||
|
#include "glfft_common.hpp"
|
||||||
|
#include "glfft_wisdom.hpp"
|
||||||
|
#include <vector>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <limits>
|
||||||
|
|
||||||
|
/// GLFFT doesn't try to preserve GL state in any way.
|
||||||
|
/// E.g. SHADER_STORAGE_BUFFER bindings, programs bound, texture bindings, etc.
|
||||||
|
/// Applications calling this library must expect that some GL state will be modified.
|
||||||
|
/// No rendering state associated with graphics will be modified.
|
||||||
|
|
||||||
|
namespace GLFFT
|
||||||
|
{
|
||||||
|
|
||||||
|
class FFT
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
/// @brief Creates a full FFT.
|
||||||
|
///
|
||||||
|
/// All buffer allocation done by GLFFT will be done in constructor.
|
||||||
|
/// Will throw if invalid parameters are passed.
|
||||||
|
///
|
||||||
|
/// @param context The graphics context.
|
||||||
|
/// @param Nx Number of samples in horizontal dimension.
|
||||||
|
/// @param Ny Number of samples in vertical dimension.
|
||||||
|
/// @param type The transform type.
|
||||||
|
/// @param direction Forward, inverse or inverse with convolution.
|
||||||
|
/// For real-to-complex and complex-to-real transforms, the
|
||||||
|
/// transform type must match.
|
||||||
|
/// @param input_target GL object type of input target. For real-to-complex with texture as input, ImageReal is used.
|
||||||
|
/// @param output_target GL object type of output target. For complex-to-real with texture as output, ImageReal is used.
|
||||||
|
/// @param cache A program cache for caching the GLFFT programs created.
|
||||||
|
/// @param options FFT options such as performance related parameters and types.
|
||||||
|
/// @param wisdom GLFFT wisdom which can override performance related options
|
||||||
|
/// (options.performance is used as a fallback).
|
||||||
|
FFT(Context *context, unsigned Nx, unsigned Ny,
|
||||||
|
Type type, Direction direction, Target input_target, Target output_target,
|
||||||
|
std::shared_ptr<ProgramCache> cache, const FFTOptions &options,
|
||||||
|
const FFTWisdom &wisdom = FFTWisdom());
|
||||||
|
|
||||||
|
/// @brief Creates a single stage FFT. Used mostly internally for benchmarking partial FFTs.
|
||||||
|
///
|
||||||
|
/// All buffer allocation done by GLFFT will be done in constructor.
|
||||||
|
/// Will throw if invalid parameters are passed.
|
||||||
|
///
|
||||||
|
/// @param context The graphics context.
|
||||||
|
/// @param Nx Number of samples in horizontal dimension.
|
||||||
|
/// @param Ny Number of samples in vertical dimension.
|
||||||
|
/// @param radix FFT radix to test.
|
||||||
|
/// @param p Accumulated p factor. If 1, "first pass" mode is tested, otherwise, generic FFT stages.
|
||||||
|
/// @param mode The transform mode.
|
||||||
|
/// @param input_target GL object type of input target. For real-to-complex with texture as input, ImageReal is used.
|
||||||
|
/// @param output_target GL object type of output target. For complex-to-real with texture as output, ImageReal is used.
|
||||||
|
/// @param cache A program cache for caching the GLFFT programs created.
|
||||||
|
/// @param options FFT options such as performance related parameters and types.
|
||||||
|
FFT(Context *context, unsigned Nx, unsigned Ny, unsigned radix, unsigned p,
|
||||||
|
Mode mode, Target input_target, Target output_target,
|
||||||
|
std::shared_ptr<ProgramCache> cache, const FFTOptions &options);
|
||||||
|
|
||||||
|
/// @brief Process the FFT.
|
||||||
|
///
|
||||||
|
/// The type of object passed here must match what FFT was initialized with.
|
||||||
|
///
|
||||||
|
/// @param cmd Command buffer for issuing dispatch commands.
|
||||||
|
/// @param output Output buffer or image.
|
||||||
|
/// NOTE: For images, the texture must be using immutable storage, i.e. glTexStorage2D!
|
||||||
|
/// @param input Input buffer or texture.
|
||||||
|
/// @param input_aux If using convolution transform type,
|
||||||
|
/// the content of input and input_aux will be multiplied together.
|
||||||
|
void process(CommandBuffer *cmd, Resource *output, Resource *input, Resource *input_aux = nullptr);
|
||||||
|
|
||||||
|
/// @brief Run process() multiple times, timing the results.
|
||||||
|
///
|
||||||
|
/// Mostly used internally by GLFFT wisdom, glfft_cli's bench, and so on.
|
||||||
|
///
|
||||||
|
/// @param context The graphics context.
|
||||||
|
/// @param output Output buffer or image.
|
||||||
|
/// NOTE: For images, the texture must be using immutable storage, i.e. glTexStorage2D!
|
||||||
|
/// @param input Input buffer or texture.
|
||||||
|
/// @param warmup_iterations Number of iterations to run to "warm" up GL, ensures we don't hit
|
||||||
|
/// recompilations or similar when benching.
|
||||||
|
/// @param iterations Number of iterations to run the benchmark.
|
||||||
|
/// Each iteration will ensure timing with a glFinish() followed by timing.
|
||||||
|
/// @param dispatches_per_iteration Number of calls to process() we should do per iteration.
|
||||||
|
/// @param max_time The max time the benchmark should run. Will be checked after each iteration is complete.
|
||||||
|
///
|
||||||
|
/// @returns Average GPU time per process() call.
|
||||||
|
double bench(Context *context, Resource *output, Resource *input,
|
||||||
|
unsigned warmup_iterations, unsigned iterations, unsigned dispatches_per_iteration,
|
||||||
|
double max_time = std::numeric_limits<double>::max());
|
||||||
|
|
||||||
|
/// @brief Returns cost for a process() call. Only used for debugging.
|
||||||
|
double get_cost() const { return cost; }
|
||||||
|
|
||||||
|
/// @brief Returns number of passes (glDispatchCompute) in a process() call.
|
||||||
|
unsigned get_num_passes() const { return passes.size(); }
|
||||||
|
|
||||||
|
/// @brief Returns Nx.
|
||||||
|
unsigned get_dimension_x() const { return size_x; }
|
||||||
|
/// @brief Returns Ny.
|
||||||
|
unsigned get_dimension_y() const { return size_y; }
|
||||||
|
|
||||||
|
/// @brief Sets offset and scale parameters for normalized texel coordinates when sampling textures.
|
||||||
|
///
|
||||||
|
/// By default, these values are 0.5 / size (samples in the center of texel (0, 0)).
|
||||||
|
/// Scale is 1.0 / size, so it steps one texel for each coordinate in the FFT transform.
|
||||||
|
/// Setting this to something custom is useful to get downsampling with GL_LINEAR -> FFT transform
|
||||||
|
/// without having to downsample the texture first, then FFT.
|
||||||
|
void set_texture_offset_scale(float offset_x, float offset_y, float scale_x, float scale_y)
|
||||||
|
{
|
||||||
|
texture.offset_x = offset_x;
|
||||||
|
texture.offset_y = offset_y;
|
||||||
|
texture.scale_x = scale_x;
|
||||||
|
texture.scale_y = scale_y;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @brief Set binding range for input.
|
||||||
|
///
|
||||||
|
/// If input is an SSBO, set a custom binding range to be passed to glBindBufferRange.
|
||||||
|
/// By default, the entire buffer is bound.
|
||||||
|
void set_input_buffer_range(size_t offset, size_t size)
|
||||||
|
{
|
||||||
|
ssbo.input.offset = offset;
|
||||||
|
ssbo.input.size = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @brief Set binding range for input_aux.
|
||||||
|
///
|
||||||
|
/// If input_aux is an SSBO, set a custom binding range to be passed to glBindBufferRange.
|
||||||
|
/// By default, the entire buffer is bound.
|
||||||
|
void set_input_aux_buffer_range(size_t offset, size_t size)
|
||||||
|
{
|
||||||
|
ssbo.input_aux.offset = offset;
|
||||||
|
ssbo.input_aux.size = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @brief Set binding range for output.
|
||||||
|
///
|
||||||
|
/// If output buffer is an SSBO, set a custom binding range to be passed to glBindBufferRange.
|
||||||
|
/// By default, the entire buffer is bound.
|
||||||
|
void set_output_buffer_range(size_t offset, size_t size)
|
||||||
|
{
|
||||||
|
ssbo.output.offset = offset;
|
||||||
|
ssbo.output.size = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// @brief Set samplers for input textures.
|
||||||
|
///
|
||||||
|
/// Set sampler objects to be used for input and input_aux if textures are used as input.
|
||||||
|
/// By default, sampler object 0 will be used (inheriting sampler parameters from the texture object itself).
|
||||||
|
void set_samplers(Sampler *sampler0, Sampler *sampler1 = nullptr)
|
||||||
|
{
|
||||||
|
texture.samplers[0] = sampler0;
|
||||||
|
texture.samplers[1] = sampler1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
Context *context;
|
||||||
|
|
||||||
|
struct Pass
|
||||||
|
{
|
||||||
|
Parameters parameters;
|
||||||
|
|
||||||
|
unsigned workgroups_x;
|
||||||
|
unsigned workgroups_y;
|
||||||
|
unsigned uv_scale_x;
|
||||||
|
unsigned stride;
|
||||||
|
Program *program;
|
||||||
|
};
|
||||||
|
|
||||||
|
double cost = 0.0;
|
||||||
|
|
||||||
|
std::unique_ptr<Buffer> temp_buffer;
|
||||||
|
std::unique_ptr<Buffer> temp_buffer_image;
|
||||||
|
std::vector<Pass> passes;
|
||||||
|
std::shared_ptr<ProgramCache> cache;
|
||||||
|
|
||||||
|
std::unique_ptr<Program> build_program(const Parameters ¶ms);
|
||||||
|
static std::string load_shader_string(const char *path);
|
||||||
|
static void store_shader_string(const char *path, const std::string &source);
|
||||||
|
|
||||||
|
Program* get_program(const Parameters ¶ms);
|
||||||
|
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
float offset_x = 0.0f, offset_y = 0.0f, scale_x = 1.0f, scale_y = 1.0f;
|
||||||
|
Sampler *samplers[2] = { nullptr, nullptr };
|
||||||
|
} texture;
|
||||||
|
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
size_t offset = 0;
|
||||||
|
size_t size = 0;
|
||||||
|
} input, input_aux, output;
|
||||||
|
} ssbo;
|
||||||
|
unsigned size_x, size_y;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
178
glfft/glfft_common.hpp
Normal file
178
glfft/glfft_common.hpp
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
/* Copyright (C) 2015 Hans-Kristian Arntzen <maister@archlinux.us>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge,
|
||||||
|
* to any person obtaining a copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation the rights to
|
||||||
|
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||||
|
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// For the most part used by the implementation.
|
||||||
|
|
||||||
|
#ifndef GLFFT_COMMON_HPP__
|
||||||
|
#define GLFFT_COMMON_HPP__
|
||||||
|
|
||||||
|
#include "glfft_interface.hpp"
|
||||||
|
#include <functional>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <string>
|
||||||
|
#include <cstring>
|
||||||
|
#include <memory>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
namespace GLFFT
|
||||||
|
{
|
||||||
|
|
||||||
|
enum Direction
|
||||||
|
{
|
||||||
|
/// Forward FFT transform.
|
||||||
|
Forward = -1,
|
||||||
|
/// Inverse FFT transform, but with two inputs (in frequency domain) which are multiplied together
|
||||||
|
/// for convolution.
|
||||||
|
InverseConvolve = 0,
|
||||||
|
/// Inverse FFT transform.
|
||||||
|
Inverse = 1
|
||||||
|
};
|
||||||
|
|
||||||
|
enum Mode
|
||||||
|
{
|
||||||
|
Horizontal,
|
||||||
|
HorizontalDual,
|
||||||
|
Vertical,
|
||||||
|
VerticalDual,
|
||||||
|
|
||||||
|
ResolveRealToComplex,
|
||||||
|
ResolveComplexToReal,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum Type
|
||||||
|
{
|
||||||
|
/// Regular complex-to-complex transform.
|
||||||
|
ComplexToComplex,
|
||||||
|
/// Complex-to-complex dual transform where the complex value is four-dimensional,
|
||||||
|
/// i.e. a vector of two complex values. Typically used to transform RGBA data.
|
||||||
|
ComplexToComplexDual,
|
||||||
|
/// Complex-to-real transform. N / 2 + 1 complex values are used per row with a stride of N complex samples.
|
||||||
|
ComplexToReal,
|
||||||
|
/// Real-to-complex transform. N / 2 + 1 complex output samples are created per row with a stride of N complex samples.
|
||||||
|
RealToComplex
|
||||||
|
};
|
||||||
|
|
||||||
|
enum Target
|
||||||
|
{
|
||||||
|
/// GL_SHADER_STORAGE_BUFFER
|
||||||
|
SSBO,
|
||||||
|
/// Textures, when used as output, type is determined by transform type.
|
||||||
|
/// ComplexToComplex / RealToComplex -> GL_RG16F
|
||||||
|
/// ComplexToComplexDual -> GL_RGBA16F
|
||||||
|
Image,
|
||||||
|
/// Real-valued (single component) textures, when used as output, type is determined by transform type.
|
||||||
|
/// ComplexToReal -> GL_R32F (because GLES 3.1 doesn't have GL_R16F image type).
|
||||||
|
ImageReal
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Parameters
|
||||||
|
{
|
||||||
|
unsigned workgroup_size_x;
|
||||||
|
unsigned workgroup_size_y;
|
||||||
|
unsigned workgroup_size_z;
|
||||||
|
unsigned radix;
|
||||||
|
unsigned vector_size;
|
||||||
|
Direction direction;
|
||||||
|
Mode mode;
|
||||||
|
Target input_target;
|
||||||
|
Target output_target;
|
||||||
|
bool p1;
|
||||||
|
bool shared_banked;
|
||||||
|
bool fft_fp16, input_fp16, output_fp16;
|
||||||
|
bool fft_normalize;
|
||||||
|
|
||||||
|
bool operator==(const Parameters &other) const
|
||||||
|
{
|
||||||
|
return std::memcmp(this, &other, sizeof(Parameters)) == 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/// @brief Options for FFT implementation.
|
||||||
|
/// Defaults for performance as conservative.
|
||||||
|
struct FFTOptions
|
||||||
|
{
|
||||||
|
struct Performance
|
||||||
|
{
|
||||||
|
/// Workgroup size used in layout(local_size_x).
|
||||||
|
/// Only affects performance, however, large values may make implementations of smaller sized FFTs impossible.
|
||||||
|
/// FFT constructor will throw in this case.
|
||||||
|
unsigned workgroup_size_x = 4;
|
||||||
|
/// Workgroup size used in layout(local_size_x).
|
||||||
|
/// Only affects performance, however, large values may make implementations of smaller sized FFTs impossible.
|
||||||
|
/// FFT constructor will throw in this case.
|
||||||
|
unsigned workgroup_size_y = 1;
|
||||||
|
/// Vector size. Very GPU dependent. "Scalar" GPUs prefer 2 here, vector GPUs prefer 4 (and maybe 8).
|
||||||
|
unsigned vector_size = 2;
|
||||||
|
/// Whether to use banked shared memory or not.
|
||||||
|
/// Desktop GPUs prefer true here, false for mobile in general.
|
||||||
|
bool shared_banked = false;
|
||||||
|
} performance;
|
||||||
|
|
||||||
|
struct Type
|
||||||
|
{
|
||||||
|
/// Whether internal shader should be mediump float.
|
||||||
|
bool fp16 = false;
|
||||||
|
/// Whether input SSBO is a packed 2xfp16 format. Otherwise, regular FP32.
|
||||||
|
bool input_fp16 = false;
|
||||||
|
/// Whether output SSBO is a packed 2xfp16 format. Otherwise, regular FP32.
|
||||||
|
bool output_fp16 = false;
|
||||||
|
/// Whether to apply 1 / N normalization factor.
|
||||||
|
bool normalize = false;
|
||||||
|
} type;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace std
|
||||||
|
{
|
||||||
|
template<>
|
||||||
|
struct hash<GLFFT::Parameters>
|
||||||
|
{
|
||||||
|
std::size_t operator()(const GLFFT::Parameters ¶ms) const
|
||||||
|
{
|
||||||
|
std::size_t h = 0;
|
||||||
|
hash<uint8_t> hasher;
|
||||||
|
for (std::size_t i = 0; i < sizeof(GLFFT::Parameters); i++)
|
||||||
|
{
|
||||||
|
h ^= hasher(reinterpret_cast<const uint8_t*>(¶ms)[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace GLFFT
|
||||||
|
{
|
||||||
|
|
||||||
|
class ProgramCache
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Program* find_program(const Parameters ¶meters) const;
|
||||||
|
void insert_program(const Parameters ¶meters, std::unique_ptr<Program> program);
|
||||||
|
size_t cache_size() const { return programs.size(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unordered_map<Parameters, std::unique_ptr<Program>> programs;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
6
glfft/glfft_gl_api_headers.hpp
Normal file
6
glfft/glfft_gl_api_headers.hpp
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
|
||||||
|
/* Let GLFFT use GLava's headers */
|
||||||
|
#define GLFFT_GLSL_LANG_STRING "#version 430 core\n"
|
||||||
|
extern "C" {
|
||||||
|
#include "../glava/glad.h"
|
||||||
|
}
|
||||||
310
glfft/glfft_gl_interface.cpp
Normal file
310
glfft/glfft_gl_interface.cpp
Normal file
@@ -0,0 +1,310 @@
|
|||||||
|
/* Copyright (C) 2015 Hans-Kristian Arntzen <maister@archlinux.us>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge,
|
||||||
|
* to any person obtaining a copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation the rights to
|
||||||
|
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||||
|
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "glfft_gl_interface.hpp"
|
||||||
|
#ifdef GLFFT_GL_DEBUG
|
||||||
|
#include "glfft_validate.hpp"
|
||||||
|
#endif
|
||||||
|
#include <cstdarg>
|
||||||
|
#include <cstring>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
using namespace GLFFT;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
GLCommandBuffer GLContext::static_command_buffer;
|
||||||
|
|
||||||
|
void GLCommandBuffer::bind_program(Program *program)
|
||||||
|
{
|
||||||
|
glUseProgram(program ? static_cast<GLProgram*>(program)->name : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GLCommandBuffer::bind_storage_texture(unsigned binding, Texture *texture, Format format)
|
||||||
|
{
|
||||||
|
glBindImageTexture(binding, static_cast<GLTexture*>(texture)->name,
|
||||||
|
0, GL_FALSE, 0, GL_WRITE_ONLY, convert(format));
|
||||||
|
}
|
||||||
|
|
||||||
|
void GLCommandBuffer::bind_texture(unsigned binding, Texture *texture)
|
||||||
|
{
|
||||||
|
glActiveTexture(GL_TEXTURE0 + binding);
|
||||||
|
glBindTexture(GL_TEXTURE_2D, static_cast<GLTexture*>(texture)->name);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GLCommandBuffer::bind_sampler(unsigned binding, Sampler *sampler)
|
||||||
|
{
|
||||||
|
glBindSampler(binding, sampler ? static_cast<GLSampler*>(sampler)->name : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GLCommandBuffer::bind_storage_buffer(unsigned binding, Buffer *buffer)
|
||||||
|
{
|
||||||
|
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, binding, static_cast<GLBuffer*>(buffer)->name);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GLCommandBuffer::bind_storage_buffer_range(unsigned binding, size_t offset, size_t size, Buffer *buffer)
|
||||||
|
{
|
||||||
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, static_cast<GLBuffer*>(buffer)->name, offset, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GLCommandBuffer::dispatch(unsigned x, unsigned y, unsigned z)
|
||||||
|
{
|
||||||
|
glDispatchCompute(x, y, z);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GLCommandBuffer::barrier(Buffer*)
|
||||||
|
{
|
||||||
|
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GLCommandBuffer::barrier(Texture*)
|
||||||
|
{
|
||||||
|
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GLCommandBuffer::barrier()
|
||||||
|
{
|
||||||
|
glMemoryBarrier(GL_ALL_BARRIER_BITS);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GLCommandBuffer::push_constant_data(unsigned binding, const void *data, size_t size)
|
||||||
|
{
|
||||||
|
glBindBufferBase(GL_UNIFORM_BUFFER, binding, ubos[ubo_index]);
|
||||||
|
void *ptr = glMapBufferRange(GL_UNIFORM_BUFFER,
|
||||||
|
0, CommandBuffer::MaxConstantDataSize,
|
||||||
|
GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
|
||||||
|
|
||||||
|
if (ptr)
|
||||||
|
{
|
||||||
|
std::memcpy(ptr, data, size);
|
||||||
|
glUnmapBuffer(GL_UNIFORM_BUFFER);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (++ubo_index >= ubo_count)
|
||||||
|
ubo_index = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
CommandBuffer* GLContext::request_command_buffer()
|
||||||
|
{
|
||||||
|
if (!initialized_ubos)
|
||||||
|
{
|
||||||
|
glGenBuffers(MaxBuffersRing, ubos);
|
||||||
|
for (auto &ubo : ubos)
|
||||||
|
{
|
||||||
|
glBindBuffer(GL_UNIFORM_BUFFER, ubo);
|
||||||
|
glBufferData(GL_UNIFORM_BUFFER, CommandBuffer::MaxConstantDataSize, nullptr, GL_STREAM_DRAW);
|
||||||
|
}
|
||||||
|
static_command_buffer.set_constant_data_buffers(ubos, MaxBuffersRing);
|
||||||
|
initialized_ubos = true;
|
||||||
|
}
|
||||||
|
return &static_command_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GLContext::submit_command_buffer(CommandBuffer*)
|
||||||
|
{}
|
||||||
|
|
||||||
|
void GLContext::wait_idle()
|
||||||
|
{
|
||||||
|
glFinish();
|
||||||
|
}
|
||||||
|
|
||||||
|
unique_ptr<Texture> GLContext::create_texture(const void *initial_data,
|
||||||
|
unsigned width, unsigned height,
|
||||||
|
Format format)
|
||||||
|
{
|
||||||
|
return unique_ptr<Texture>(new GLTexture(initial_data, width, height, format));
|
||||||
|
}
|
||||||
|
|
||||||
|
unique_ptr<Buffer> GLContext::create_buffer(const void *initial_data, size_t size, AccessMode access)
|
||||||
|
{
|
||||||
|
return unique_ptr<Buffer>(new GLBuffer(initial_data, size, access));
|
||||||
|
}
|
||||||
|
|
||||||
|
unique_ptr<Program> GLContext::compile_compute_shader(const char *source)
|
||||||
|
{
|
||||||
|
#ifdef GLFFT_GL_DEBUG
|
||||||
|
if (!validate_glsl_source(source))
|
||||||
|
return nullptr;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
GLuint program = glCreateProgram();
|
||||||
|
if (!program)
|
||||||
|
{
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
GLuint shader = glCreateShader(GL_COMPUTE_SHADER);
|
||||||
|
|
||||||
|
const char *sources[] = { GLFFT_GLSL_LANG_STRING, source };
|
||||||
|
glShaderSource(shader, 2, sources, NULL);
|
||||||
|
glCompileShader(shader);
|
||||||
|
|
||||||
|
GLint status;
|
||||||
|
glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
|
||||||
|
if (status == GL_FALSE)
|
||||||
|
{
|
||||||
|
GLint len;
|
||||||
|
GLsizei out_len;
|
||||||
|
|
||||||
|
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &len);
|
||||||
|
vector<char> buf(len);
|
||||||
|
glGetShaderInfoLog(shader, len, &out_len, buf.data());
|
||||||
|
log("GLFFT: Shader log:\n%s\n\n", buf.data());
|
||||||
|
|
||||||
|
glDeleteShader(shader);
|
||||||
|
glDeleteProgram(program);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
glAttachShader(program, shader);
|
||||||
|
glLinkProgram(program);
|
||||||
|
glDeleteShader(shader);
|
||||||
|
|
||||||
|
glGetProgramiv(program, GL_LINK_STATUS, &status);
|
||||||
|
if (status == GL_FALSE)
|
||||||
|
{
|
||||||
|
GLint len;
|
||||||
|
GLsizei out_len;
|
||||||
|
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &len);
|
||||||
|
vector<char> buf(len);
|
||||||
|
glGetProgramInfoLog(program, len, &out_len, buf.data());
|
||||||
|
log("Program log:\n%s\n\n", buf.data());
|
||||||
|
|
||||||
|
glDeleteProgram(program);
|
||||||
|
glDeleteShader(shader);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
return unique_ptr<Program>(new GLProgram(program));
|
||||||
|
}
|
||||||
|
|
||||||
|
void GLContext::log(const char *fmt, ...)
|
||||||
|
{
|
||||||
|
char buffer[4 * 1024];
|
||||||
|
|
||||||
|
va_list va;
|
||||||
|
va_start(va, fmt);
|
||||||
|
vsnprintf(buffer, sizeof(buffer), fmt, va);
|
||||||
|
va_end(va);
|
||||||
|
glfft_log("%s", buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
double GLContext::get_time()
|
||||||
|
{
|
||||||
|
return glfft_time();
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned GLContext::get_max_work_group_threads()
|
||||||
|
{
|
||||||
|
GLint value;
|
||||||
|
glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &value);
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* GLContext::get_renderer_string()
|
||||||
|
{
|
||||||
|
return reinterpret_cast<const char*>(glGetString(GL_RENDERER));
|
||||||
|
}
|
||||||
|
|
||||||
|
const void* GLContext::map(Buffer *buffer, size_t offset, size_t size)
|
||||||
|
{
|
||||||
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER, static_cast<GLBuffer*>(buffer)->name);
|
||||||
|
const void *ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, offset, size, GL_MAP_READ_BIT);
|
||||||
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GLContext::unmap(Buffer *buffer)
|
||||||
|
{
|
||||||
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER, static_cast<GLBuffer*>(buffer)->name);
|
||||||
|
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
|
||||||
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GLContext::teardown()
|
||||||
|
{
|
||||||
|
if (initialized_ubos)
|
||||||
|
glDeleteBuffers(MaxBuffersRing, ubos);
|
||||||
|
initialized_ubos = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
GLContext::~GLContext()
|
||||||
|
{
|
||||||
|
teardown();
|
||||||
|
}
|
||||||
|
|
||||||
|
GLTexture::GLTexture(const void *initial_data,
|
||||||
|
unsigned width, unsigned height,
|
||||||
|
Format format)
|
||||||
|
{
|
||||||
|
glGenTextures(1, &name);
|
||||||
|
glBindTexture(GL_TEXTURE_2D, name);
|
||||||
|
glTexStorage2D(GL_TEXTURE_2D, 1, convert(format), width, height);
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||||
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||||
|
|
||||||
|
if (initial_data)
|
||||||
|
{
|
||||||
|
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, width, height,
|
||||||
|
convert_format(format), convert_type(format), initial_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
glBindTexture(GL_TEXTURE_2D, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
GLTexture::~GLTexture()
|
||||||
|
{
|
||||||
|
if (owned)
|
||||||
|
glDeleteTextures(1, &name);
|
||||||
|
}
|
||||||
|
|
||||||
|
GLBuffer::GLBuffer(const void *initial_data, size_t size, AccessMode access)
|
||||||
|
{
|
||||||
|
glGenBuffers(1, &name);
|
||||||
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER, name);
|
||||||
|
glBufferData(GL_SHADER_STORAGE_BUFFER, size, initial_data, convert(access));
|
||||||
|
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
GLBuffer::~GLBuffer()
|
||||||
|
{
|
||||||
|
if (owned)
|
||||||
|
glDeleteBuffers(1, &name);
|
||||||
|
}
|
||||||
|
|
||||||
|
GLProgram::GLProgram(GLuint name)
|
||||||
|
: name(name)
|
||||||
|
{}
|
||||||
|
|
||||||
|
GLProgram::~GLProgram()
|
||||||
|
{
|
||||||
|
if (name != 0)
|
||||||
|
{
|
||||||
|
glDeleteProgram(name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
GLSampler::~GLSampler()
|
||||||
|
{
|
||||||
|
if (name != 0)
|
||||||
|
{
|
||||||
|
glDeleteSamplers(1, &name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
258
glfft/glfft_gl_interface.hpp
Normal file
258
glfft/glfft_gl_interface.hpp
Normal file
@@ -0,0 +1,258 @@
|
|||||||
|
/* Copyright (C) 2015 Hans-Kristian Arntzen <maister@archlinux.us>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge,
|
||||||
|
* to any person obtaining a copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation the rights to
|
||||||
|
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||||
|
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef GLFFT_GL_INTERFACE_HPP__
|
||||||
|
#define GLFFT_GL_INTERFACE_HPP__
|
||||||
|
|
||||||
|
#include "glfft_interface.hpp"
|
||||||
|
|
||||||
|
#include "glfft_gl_api_headers.hpp"
|
||||||
|
|
||||||
|
/* GLava additions (POSIX) */
|
||||||
|
extern "C" {
|
||||||
|
#include <time.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <error.h>
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef GLFFT_GLSL_LANG_STRING
|
||||||
|
#error GLFFT_GLSL_LANG_STRING must be defined to e.g. "#version 310 es\n" or "#version 430 core\n".
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef GLFFT_LOG_OVERRIDE
|
||||||
|
void glfft_log(const char *fmt, ...) {
|
||||||
|
va_list l;
|
||||||
|
va_start(l, fmt);
|
||||||
|
vfprintf(stdout, fmt, l);
|
||||||
|
va_end(l);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define glfft_log GLFFT_LOG_OVERRIDE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef GLFFT_TIME_OVERRIDE
|
||||||
|
double glfft_time() {
|
||||||
|
struct timespec tv;
|
||||||
|
if (clock_gettime(CLOCK_REALTIME, &tv)) {
|
||||||
|
fprintf(stderr, "clock_gettime(CLOCK_REALTIME, ...): %s\n", strerror(errno));
|
||||||
|
}
|
||||||
|
return (double) tv.tv_sec + ((double) tv.tv_nsec / 1000000000.0);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define glfft_time GLFFT_TIME_OVERRIDE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace GLFFT
|
||||||
|
{
|
||||||
|
class GLContext;
|
||||||
|
|
||||||
|
class GLTexture : public Texture
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
friend class GLContext;
|
||||||
|
friend class GLCommandBuffer;
|
||||||
|
~GLTexture();
|
||||||
|
|
||||||
|
GLTexture(GLuint obj) : name(obj), owned(false) {}
|
||||||
|
GLuint get() const { return name; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
GLTexture(const void *initial_data,
|
||||||
|
unsigned width, unsigned height,
|
||||||
|
Format format);
|
||||||
|
GLuint name;
|
||||||
|
bool owned = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Not really used by test and bench code, but can be useful for API users.
|
||||||
|
class GLSampler : public Sampler
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
friend class GLContext;
|
||||||
|
friend class GLCommandBuffer;
|
||||||
|
~GLSampler();
|
||||||
|
|
||||||
|
GLSampler(GLuint obj) : name(obj) {}
|
||||||
|
GLuint get() const { return name; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
GLuint name;
|
||||||
|
};
|
||||||
|
|
||||||
|
class GLBuffer : public Buffer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
friend class GLContext;
|
||||||
|
friend class GLCommandBuffer;
|
||||||
|
~GLBuffer();
|
||||||
|
|
||||||
|
GLBuffer(GLuint obj) : name(obj), owned(false) {}
|
||||||
|
GLuint get() const { return name; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
GLuint name;
|
||||||
|
GLBuffer(const void *initial_data, size_t size, AccessMode access);
|
||||||
|
bool owned = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
class GLProgram : public Program
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
friend class GLContext;
|
||||||
|
friend class GLCommandBuffer;
|
||||||
|
~GLProgram();
|
||||||
|
|
||||||
|
GLuint get() const { return name; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
GLProgram(GLuint name);
|
||||||
|
GLuint name;
|
||||||
|
};
|
||||||
|
|
||||||
|
class GLCommandBuffer : public CommandBuffer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
~GLCommandBuffer() = default;
|
||||||
|
|
||||||
|
void set_constant_data_buffers(const GLuint *ubos, unsigned count)
|
||||||
|
{
|
||||||
|
this->ubos = ubos;
|
||||||
|
ubo_index = 0;
|
||||||
|
ubo_count = count;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bind_program(Program *program) override;
|
||||||
|
void bind_storage_texture(unsigned binding, Texture *texture, Format format) override;
|
||||||
|
void bind_texture(unsigned binding, Texture *texture) override;
|
||||||
|
void bind_sampler(unsigned binding, Sampler *sampler) override;
|
||||||
|
void bind_storage_buffer(unsigned binding, Buffer *texture) override;
|
||||||
|
void bind_storage_buffer_range(unsigned binding, size_t offset, size_t length, Buffer *texture) override;
|
||||||
|
void dispatch(unsigned x, unsigned y, unsigned z) override;
|
||||||
|
|
||||||
|
void barrier(Buffer *buffer) override;
|
||||||
|
void barrier(Texture *buffer) override;
|
||||||
|
void barrier() override;
|
||||||
|
|
||||||
|
void push_constant_data(unsigned binding, const void *data, size_t size) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const GLuint *ubos = nullptr;
|
||||||
|
unsigned ubo_count = 0;
|
||||||
|
unsigned ubo_index = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
class GLContext : public Context
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
~GLContext();
|
||||||
|
|
||||||
|
std::unique_ptr<Texture> create_texture(const void *initial_data,
|
||||||
|
unsigned width, unsigned height,
|
||||||
|
Format format) override;
|
||||||
|
|
||||||
|
std::unique_ptr<Buffer> create_buffer(const void *initial_data, size_t size, AccessMode access) override;
|
||||||
|
std::unique_ptr<Program> compile_compute_shader(const char *source) override;
|
||||||
|
|
||||||
|
CommandBuffer* request_command_buffer() override;
|
||||||
|
void submit_command_buffer(CommandBuffer *cmd) override;
|
||||||
|
void wait_idle() override;
|
||||||
|
|
||||||
|
const char* get_renderer_string() override;
|
||||||
|
void log(const char *fmt, ...) override;
|
||||||
|
double get_time() override;
|
||||||
|
|
||||||
|
unsigned get_max_work_group_threads() override;
|
||||||
|
|
||||||
|
const void* map(Buffer *buffer, size_t offset, size_t size) override;
|
||||||
|
void unmap(Buffer *buffer) override;
|
||||||
|
|
||||||
|
// Not supported in GLES, so override when creating platform-specific context.
|
||||||
|
bool supports_texture_readback() override { return false; }
|
||||||
|
void read_texture(void*, Texture*, Format) override {}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void teardown();
|
||||||
|
|
||||||
|
private:
|
||||||
|
static GLCommandBuffer static_command_buffer;
|
||||||
|
|
||||||
|
enum { MaxBuffersRing = 256 };
|
||||||
|
GLuint ubos[MaxBuffersRing];
|
||||||
|
bool initialized_ubos = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline GLenum convert(AccessMode mode)
|
||||||
|
{
|
||||||
|
switch (mode)
|
||||||
|
{
|
||||||
|
case AccessStreamCopy: return GL_STREAM_COPY;
|
||||||
|
case AccessStaticCopy: return GL_STATIC_COPY;
|
||||||
|
case AccessStreamRead: return GL_STREAM_READ;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline GLenum convert(Format format)
|
||||||
|
{
|
||||||
|
switch (format)
|
||||||
|
{
|
||||||
|
case FormatR16G16B16A16Float: return GL_RGBA16F;
|
||||||
|
case FormatR32G32B32A32Float: return GL_RGBA32F;
|
||||||
|
case FormatR32Float: return GL_R32F;
|
||||||
|
case FormatR16G16Float: return GL_RG16F;
|
||||||
|
case FormatR32G32Float: return GL_RG32F;
|
||||||
|
case FormatR32Uint: return GL_R32UI;
|
||||||
|
case FormatUnknown: return 0;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline GLenum convert_format(Format format)
|
||||||
|
{
|
||||||
|
switch (format)
|
||||||
|
{
|
||||||
|
case FormatR16G16Float: return GL_RG;
|
||||||
|
case FormatR32G32Float: return GL_RG;
|
||||||
|
case FormatR16G16B16A16Float: return GL_RGBA;
|
||||||
|
case FormatR32G32B32A32Float: return GL_RGBA;
|
||||||
|
case FormatR32Float: return GL_RED;
|
||||||
|
case FormatR32Uint: return GL_RED_INTEGER;
|
||||||
|
case FormatUnknown: return 0;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline GLenum convert_type(Format format)
|
||||||
|
{
|
||||||
|
switch (format)
|
||||||
|
{
|
||||||
|
case FormatR16G16Float: return GL_HALF_FLOAT;
|
||||||
|
case FormatR16G16B16A16Float: return GL_HALF_FLOAT;
|
||||||
|
case FormatR32Float: return GL_FLOAT;
|
||||||
|
case FormatR32G32Float: return GL_FLOAT;
|
||||||
|
case FormatR32G32B32A32Float: return GL_FLOAT;
|
||||||
|
case FormatR32Uint: return GL_UNSIGNED_INT;
|
||||||
|
case FormatUnknown: return 0;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
131
glfft/glfft_interface.hpp
Normal file
131
glfft/glfft_interface.hpp
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
/* Copyright (C) 2015 Hans-Kristian Arntzen <maister@archlinux.us>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge,
|
||||||
|
* to any person obtaining a copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation the rights to
|
||||||
|
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||||
|
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef GLFFT_INTERFACE_HPP__
|
||||||
|
#define GLFFT_INTERFACE_HPP__
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
namespace GLFFT
|
||||||
|
{
|
||||||
|
class Context;
|
||||||
|
|
||||||
|
class Resource
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~Resource() = default;
|
||||||
|
|
||||||
|
// Non-movable, non-copyable to make things simpler.
|
||||||
|
Resource(Resource&&) = delete;
|
||||||
|
void operator=(const Resource&) = delete;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
Resource() = default;
|
||||||
|
};
|
||||||
|
|
||||||
|
class Texture : public Resource {};
|
||||||
|
class Sampler : public Resource {};
|
||||||
|
class Buffer : public Resource {};
|
||||||
|
|
||||||
|
class Program
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~Program() = default;
|
||||||
|
protected:
|
||||||
|
friend class Context;
|
||||||
|
Program() = default;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum AccessMode
|
||||||
|
{
|
||||||
|
AccessStreamCopy,
|
||||||
|
AccessStaticCopy,
|
||||||
|
AccessStreamRead
|
||||||
|
};
|
||||||
|
|
||||||
|
enum Format
|
||||||
|
{
|
||||||
|
FormatUnknown,
|
||||||
|
FormatR16G16B16A16Float,
|
||||||
|
FormatR32G32B32A32Float,
|
||||||
|
FormatR32G32Float,
|
||||||
|
FormatR32Float,
|
||||||
|
FormatR16G16Float,
|
||||||
|
FormatR32Uint
|
||||||
|
};
|
||||||
|
|
||||||
|
class CommandBuffer;
|
||||||
|
|
||||||
|
class Context
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~Context() = default;
|
||||||
|
|
||||||
|
virtual std::unique_ptr<Texture> create_texture(const void *initial_data,
|
||||||
|
unsigned width, unsigned height,
|
||||||
|
Format format) = 0;
|
||||||
|
|
||||||
|
virtual std::unique_ptr<Buffer> create_buffer(const void *initial_data, size_t size, AccessMode access) = 0;
|
||||||
|
virtual std::unique_ptr<Program> compile_compute_shader(const char *source) = 0;
|
||||||
|
|
||||||
|
virtual CommandBuffer* request_command_buffer() = 0;
|
||||||
|
virtual void submit_command_buffer(CommandBuffer *cmd) = 0;
|
||||||
|
virtual void wait_idle() = 0;
|
||||||
|
|
||||||
|
virtual const char* get_renderer_string() = 0;
|
||||||
|
virtual void log(const char *fmt, ...) = 0;
|
||||||
|
virtual double get_time() = 0;
|
||||||
|
|
||||||
|
virtual unsigned get_max_work_group_threads() = 0;
|
||||||
|
|
||||||
|
virtual const void* map(Buffer *buffer, size_t offset, size_t size) = 0;
|
||||||
|
virtual void unmap(Buffer *buffer) = 0;
|
||||||
|
|
||||||
|
virtual bool supports_texture_readback() = 0;
|
||||||
|
virtual void read_texture(void *buffer, Texture *texture, Format format) = 0;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
Context() = default;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CommandBuffer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~CommandBuffer() = default;
|
||||||
|
|
||||||
|
virtual void bind_program(Program *program) = 0;
|
||||||
|
virtual void bind_storage_texture(unsigned binding, Texture *texture, Format format) = 0;
|
||||||
|
virtual void bind_texture(unsigned binding, Texture *texture) = 0;
|
||||||
|
virtual void bind_sampler(unsigned binding, Sampler *sampler) = 0;
|
||||||
|
virtual void bind_storage_buffer(unsigned binding, Buffer *texture) = 0;
|
||||||
|
virtual void bind_storage_buffer_range(unsigned binding, size_t offset, size_t length, Buffer *texture) = 0;
|
||||||
|
virtual void dispatch(unsigned x, unsigned y, unsigned z) = 0;
|
||||||
|
|
||||||
|
virtual void barrier(Buffer *buffer) = 0;
|
||||||
|
virtual void barrier(Texture *buffer) = 0;
|
||||||
|
virtual void barrier() = 0;
|
||||||
|
|
||||||
|
enum { MaxConstantDataSize = 64 };
|
||||||
|
virtual void push_constant_data(unsigned binding, const void *data, size_t size) = 0;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
CommandBuffer() = default;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
600
glfft/glfft_wisdom.cpp
Normal file
600
glfft/glfft_wisdom.cpp
Normal file
@@ -0,0 +1,600 @@
|
|||||||
|
/* Copyright (C) 2015 Hans-Kristian Arntzen <maister@archlinux.us>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge,
|
||||||
|
* to any person obtaining a copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation the rights to
|
||||||
|
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||||
|
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "glfft_wisdom.hpp"
|
||||||
|
#include "glfft_interface.hpp"
|
||||||
|
#include "glfft.hpp"
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
/* GLAVA NOTICE: automatic wisdom serialization support may be added at a late date */
|
||||||
|
#ifdef GLFFT_SERIALIZATION
|
||||||
|
#include "rapidjson/reader.h"
|
||||||
|
#include "rapidjson/prettywriter.h"
|
||||||
|
#include "rapidjson/stringbuffer.h"
|
||||||
|
#include "rapidjson/document.h"
|
||||||
|
using namespace rapidjson;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef GLFFT_CLI_ASYNC
|
||||||
|
#include "glfft_cli.hpp"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace GLFFT;
|
||||||
|
|
||||||
|
FFTStaticWisdom FFTWisdom::get_static_wisdom_from_renderer(Context *context)
|
||||||
|
{
|
||||||
|
FFTStaticWisdom res;
|
||||||
|
|
||||||
|
const char *renderer = context->get_renderer_string();
|
||||||
|
unsigned threads = context->get_max_work_group_threads();
|
||||||
|
|
||||||
|
if (strstr(renderer, "GeForce") || strstr(renderer, "Quadro"))
|
||||||
|
{
|
||||||
|
context->log("Detected GeForce/Quadro GPU.\n");
|
||||||
|
res.min_workgroup_size = 32; // Warp threads.
|
||||||
|
res.min_workgroup_size_shared = 32;
|
||||||
|
res.max_workgroup_size = min(threads, 256u); // Very unlikely that more than 256 threads will do anything good.
|
||||||
|
res.min_vector_size = 2;
|
||||||
|
res.max_vector_size = 2;
|
||||||
|
res.shared_banked = FFTStaticWisdom::True;
|
||||||
|
}
|
||||||
|
else if (strstr(renderer, "Radeon"))
|
||||||
|
{
|
||||||
|
context->log("Detected Radeon GPU.\n");
|
||||||
|
res.min_workgroup_size = 64; // Wavefront threads (GCN).
|
||||||
|
res.min_workgroup_size_shared = 128;
|
||||||
|
res.max_workgroup_size = min(threads, 256u); // Very unlikely that more than 256 threads will do anything good.
|
||||||
|
// TODO: Find if we can restrict this to 2 or 4 always.
|
||||||
|
res.min_vector_size = 2;
|
||||||
|
res.max_vector_size = 4;
|
||||||
|
res.shared_banked = FFTStaticWisdom::True;
|
||||||
|
}
|
||||||
|
else if (strstr(renderer, "Mali"))
|
||||||
|
{
|
||||||
|
context->log("Detected Mali GPU.\n");
|
||||||
|
|
||||||
|
res.min_workgroup_size = 4;
|
||||||
|
res.min_workgroup_size_shared = 4;
|
||||||
|
res.max_workgroup_size = 64; // Going beyond 64 threads per WG is not a good idea.
|
||||||
|
res.min_vector_size = 4;
|
||||||
|
res.max_vector_size = 4;
|
||||||
|
res.shared_banked = FFTStaticWisdom::False;
|
||||||
|
}
|
||||||
|
// TODO: Add more GPUs.
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
pair<double, FFTOptions::Performance> FFTWisdom::learn_optimal_options(
|
||||||
|
Context *context, unsigned Nx, unsigned Ny, unsigned radix,
|
||||||
|
Mode mode, Target input_target, Target output_target,
|
||||||
|
const FFTOptions::Type &type)
|
||||||
|
{
|
||||||
|
WisdomPass pass = {
|
||||||
|
{
|
||||||
|
Nx, Ny, radix, mode, input_target, output_target,
|
||||||
|
type,
|
||||||
|
},
|
||||||
|
0.0,
|
||||||
|
};
|
||||||
|
|
||||||
|
auto itr = library.find(pass);
|
||||||
|
if (itr != end(library))
|
||||||
|
{
|
||||||
|
return make_pair(itr->first.cost, itr->second);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto result = study(context, pass, type);
|
||||||
|
pass.cost = result.first;
|
||||||
|
library[pass] = result.second;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void FFTWisdom::learn_optimal_options_exhaustive(Context *context,
|
||||||
|
unsigned Nx, unsigned Ny,
|
||||||
|
Type type, Target input_target, Target output_target, const FFTOptions::Type &fft_type)
|
||||||
|
{
|
||||||
|
bool learn_resolve = type == ComplexToReal || type == RealToComplex;
|
||||||
|
Mode vertical_mode = type == ComplexToComplexDual ? VerticalDual : Vertical;
|
||||||
|
Mode horizontal_mode = type == ComplexToComplexDual ? HorizontalDual : Horizontal;
|
||||||
|
|
||||||
|
// Create wisdom for horizontal transforms and vertical transform.
|
||||||
|
static const unsigned radices[] = { 4, 8, 16, 64 };
|
||||||
|
for (auto radix : radices)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
// If we're doing SSBO -> Image or Image -> SSBO. Create wisdom for the two variants.
|
||||||
|
|
||||||
|
// Learn plain transforms.
|
||||||
|
if (Ny > 1)
|
||||||
|
{
|
||||||
|
learn_optimal_options(context, Nx >> learn_resolve, Ny, radix, vertical_mode, SSBO, SSBO, fft_type);
|
||||||
|
}
|
||||||
|
learn_optimal_options(context, Nx >> learn_resolve, Ny, radix, horizontal_mode, SSBO, SSBO, fft_type);
|
||||||
|
|
||||||
|
// Learn the first/last pass transforms. Can be fairly significant since accessing textures makes more sense with
|
||||||
|
// block interleave and larger WG_Y sizes.
|
||||||
|
if (input_target != SSBO)
|
||||||
|
{
|
||||||
|
if (Ny > 1)
|
||||||
|
{
|
||||||
|
learn_optimal_options(context, Nx >> learn_resolve, Ny, radix, vertical_mode, input_target, SSBO, fft_type);
|
||||||
|
}
|
||||||
|
learn_optimal_options(context, Nx >> learn_resolve, Ny, radix, horizontal_mode, input_target, SSBO, fft_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (output_target != SSBO)
|
||||||
|
{
|
||||||
|
if (Ny > 1)
|
||||||
|
{
|
||||||
|
learn_optimal_options(context, Nx >> learn_resolve, Ny, radix, vertical_mode, SSBO, output_target, fft_type);
|
||||||
|
}
|
||||||
|
learn_optimal_options(context, Nx >> learn_resolve, Ny, radix, horizontal_mode, SSBO, output_target, fft_type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#ifdef GLFFT_CLI_ASYNC
|
||||||
|
catch (const AsyncCancellation &)
|
||||||
|
{
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
// If our default options cannot successfully create the radix pass (i.e. throws),
|
||||||
|
// just ignore it for purpose of creating wisdom.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto resolve_type = fft_type;
|
||||||
|
resolve_type.input_fp16 = resolve_type.output_fp16;
|
||||||
|
Mode resolve_mode = type == ComplexToReal ? ResolveComplexToReal : ResolveRealToComplex;
|
||||||
|
Target resolve_input_target = SSBO;
|
||||||
|
|
||||||
|
// If we have C2R Nx1 transform, the first pass is resolve, so use those types.
|
||||||
|
if (type == ComplexToReal && Ny == 1)
|
||||||
|
{
|
||||||
|
resolve_type = fft_type;
|
||||||
|
resolve_input_target = input_target;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we need to do a resolve pass, train this case as well.
|
||||||
|
if (learn_resolve)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
// If Ny == 1 and we're doing RealToComplex, this will be the last pass, so use output_target as target.
|
||||||
|
if (Ny == 1 && resolve_mode == ResolveRealToComplex)
|
||||||
|
{
|
||||||
|
learn_optimal_options(context, Nx >> learn_resolve, Ny, 2, resolve_mode, resolve_input_target, output_target, resolve_type);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
learn_optimal_options(context, Nx >> learn_resolve, Ny, 2, resolve_mode, resolve_input_target, SSBO, resolve_type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#ifdef GLFFT_CLI_ASYNC
|
||||||
|
catch (const AsyncCancellation &)
|
||||||
|
{
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
// If our default options cannot successfully create the radix pass (i.e. throws),
|
||||||
|
// just ignore it for purpose of creating wisdom.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double FFTWisdom::bench(Context *context, Resource *output, Resource *input,
|
||||||
|
const WisdomPass &pass, const FFTOptions &options, const shared_ptr<ProgramCache> &cache) const
|
||||||
|
{
|
||||||
|
FFT fft(context, pass.pass.Nx, pass.pass.Ny, pass.pass.radix, pass.pass.input_target != SSBO ? 1 : pass.pass.radix,
|
||||||
|
pass.pass.mode, pass.pass.input_target, pass.pass.output_target,
|
||||||
|
cache, options);
|
||||||
|
|
||||||
|
return fft.bench(context,
|
||||||
|
output, input, params.warmup, params.iterations, params.dispatches, params.timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned mode_to_size(Mode mode)
|
||||||
|
{
|
||||||
|
switch (mode)
|
||||||
|
{
|
||||||
|
case VerticalDual:
|
||||||
|
case HorizontalDual:
|
||||||
|
case ResolveRealToComplex:
|
||||||
|
case ResolveComplexToReal:
|
||||||
|
return 4;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<double, FFTOptions::Performance> FFTWisdom::study(Context *context, const WisdomPass &pass, FFTOptions::Type type) const
|
||||||
|
{
|
||||||
|
auto cache = make_shared<ProgramCache>();
|
||||||
|
|
||||||
|
unique_ptr<Resource> output;
|
||||||
|
unique_ptr<Resource> input;
|
||||||
|
|
||||||
|
unsigned mode_size = mode_to_size(pass.pass.mode);
|
||||||
|
vector<float> tmp(mode_size * pass.pass.Nx * pass.pass.Ny);
|
||||||
|
|
||||||
|
if (pass.pass.input_target == SSBO)
|
||||||
|
{
|
||||||
|
input = context->create_buffer(tmp.data(), tmp.size() * sizeof(float) >> type.input_fp16, AccessStaticCopy);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Format format = FormatUnknown;
|
||||||
|
unsigned Nx = pass.pass.Nx;
|
||||||
|
unsigned Ny = pass.pass.Ny;
|
||||||
|
|
||||||
|
switch (pass.pass.mode)
|
||||||
|
{
|
||||||
|
case VerticalDual:
|
||||||
|
case HorizontalDual:
|
||||||
|
format = FormatR32G32B32A32Float;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Vertical:
|
||||||
|
case Horizontal:
|
||||||
|
format = FormatR32G32Float;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ResolveComplexToReal:
|
||||||
|
format = FormatR32G32Float;
|
||||||
|
Nx *= 2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
throw logic_error("Invalid input mode.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
input = context->create_texture(tmp.data(), Nx, Ny, format);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pass.pass.output_target == SSBO)
|
||||||
|
{
|
||||||
|
output = context->create_buffer(nullptr, tmp.size() * sizeof(float) >> type.output_fp16, AccessStreamCopy);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Format format = FormatUnknown;
|
||||||
|
unsigned Nx = pass.pass.Nx;
|
||||||
|
unsigned Ny = pass.pass.Ny;
|
||||||
|
|
||||||
|
switch (pass.pass.mode)
|
||||||
|
{
|
||||||
|
case VerticalDual:
|
||||||
|
case HorizontalDual:
|
||||||
|
format = FormatR32G32B32A32Float;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case Vertical:
|
||||||
|
case Horizontal:
|
||||||
|
format = FormatR32G32Float;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ResolveRealToComplex:
|
||||||
|
format = FormatR32G32Float;
|
||||||
|
Nx *= 2;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
throw logic_error("Invalid output mode.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
output = context->create_texture(nullptr, Nx, Ny, format);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exhaustive search, look for every sensible combination, and find fastest parameters.
|
||||||
|
// Get initial best cost with defaults.
|
||||||
|
FFTOptions::Performance best_perf;
|
||||||
|
double minimum_cost = bench(context, output.get(), input.get(), pass, { best_perf, type }, cache);
|
||||||
|
|
||||||
|
static const FFTStaticWisdom::Tristate shared_banked_values[] = { FFTStaticWisdom::False, FFTStaticWisdom::True };
|
||||||
|
static const unsigned vector_size_values[] = { 2, 4, 8 };
|
||||||
|
static const unsigned workgroup_size_x_values[] = { 4, 8, 16, 32, 64, 128, 256 };
|
||||||
|
static const unsigned workgroup_size_y_values[] = { 1, 2, 4, 8, };
|
||||||
|
|
||||||
|
bool test_resolve = pass.pass.mode == ResolveComplexToReal || pass.pass.mode == ResolveRealToComplex;
|
||||||
|
bool test_dual = pass.pass.mode == VerticalDual || pass.pass.mode == HorizontalDual;
|
||||||
|
unsigned bench_count = 0;
|
||||||
|
|
||||||
|
for (auto shared_banked : shared_banked_values)
|
||||||
|
{
|
||||||
|
// Useless test, since shared banked is only relevant for radix 16/64.
|
||||||
|
if (pass.pass.radix < 16 && shared_banked)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool fair_shared_banked = (pass.pass.radix < 16) ||
|
||||||
|
(static_wisdom.shared_banked == FFTStaticWisdom::DontCare) ||
|
||||||
|
(shared_banked == static_wisdom.shared_banked);
|
||||||
|
|
||||||
|
if (!fair_shared_banked)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto vector_size : vector_size_values)
|
||||||
|
{
|
||||||
|
// Resolve passes currently only support vector size 2. Shared banked makes no sense either.
|
||||||
|
if (test_resolve && (vector_size != 2 || shared_banked))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We can only use vector_size 8 with FP16.
|
||||||
|
if (vector_size == 8 && (!type.fp16 || !type.input_fp16 || !type.output_fp16))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Makes little sense to test since since vector_size will be bumped to 4 anyways.
|
||||||
|
if (test_dual && vector_size < 4)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto workgroup_size_x : workgroup_size_x_values)
|
||||||
|
{
|
||||||
|
for (auto workgroup_size_y : workgroup_size_y_values)
|
||||||
|
{
|
||||||
|
unsigned workgroup_size = workgroup_size_x * workgroup_size_y;
|
||||||
|
|
||||||
|
unsigned min_workgroup_size = pass.pass.radix >= 16 ? static_wisdom.min_workgroup_size_shared :
|
||||||
|
static_wisdom.min_workgroup_size;
|
||||||
|
|
||||||
|
unsigned min_vector_size = test_dual ? max(4u, static_wisdom.min_vector_size) : static_wisdom.min_vector_size;
|
||||||
|
unsigned max_vector_size = test_dual ? max(4u, static_wisdom.max_vector_size) : static_wisdom.max_vector_size;
|
||||||
|
|
||||||
|
bool fair_workgroup_size = workgroup_size <= static_wisdom.max_workgroup_size &&
|
||||||
|
workgroup_size >= min_workgroup_size;
|
||||||
|
if (pass.pass.Ny == 1 && workgroup_size_y > 1)
|
||||||
|
{
|
||||||
|
fair_workgroup_size = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!fair_workgroup_size)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we have dual mode, accept vector sizes larger than max.
|
||||||
|
bool fair_vector_size = test_resolve || (vector_size <= max_vector_size &&
|
||||||
|
vector_size >= min_vector_size);
|
||||||
|
|
||||||
|
if (!fair_vector_size)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
FFTOptions::Performance perf;
|
||||||
|
perf.shared_banked = shared_banked;
|
||||||
|
perf.vector_size = vector_size;
|
||||||
|
perf.workgroup_size_x = workgroup_size_x;
|
||||||
|
perf.workgroup_size_y = workgroup_size_y;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
// If workgroup sizes are too big for our test, this will throw.
|
||||||
|
double cost = bench(context, output.get(), input.get(), pass, { perf, type }, cache);
|
||||||
|
bench_count++;
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
context->log("\nWisdom run (mode = %u, radix = %u):\n", pass.pass.mode, pass.pass.radix);
|
||||||
|
context->log(" Width: %4u\n", pass.pass.Nx);
|
||||||
|
context->log(" Height: %4u\n", pass.pass.Ny);
|
||||||
|
context->log(" Shared banked: %3s\n", shared_banked ? "yes" : "no");
|
||||||
|
context->log(" Vector size: %u\n", vector_size);
|
||||||
|
context->log(" Workgroup size: (%u, %u)\n", workgroup_size_x, workgroup_size_y);
|
||||||
|
context->log(" Cost: %8.3g\n", cost);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (cost < minimum_cost)
|
||||||
|
{
|
||||||
|
#if 1
|
||||||
|
context->log(" New optimal solution! (%g -> %g)\n", minimum_cost, cost);
|
||||||
|
#endif
|
||||||
|
best_perf = perf;
|
||||||
|
minimum_cost = cost;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#ifdef GLFFT_CLI_ASYNC
|
||||||
|
catch (const AsyncCancellation &)
|
||||||
|
{
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
// If we pass in bogus parameters,
|
||||||
|
// FFT will throw and we just ignore this.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context->log("Tested %u variants!\n", bench_count);
|
||||||
|
return make_pair(minimum_cost, best_perf);
|
||||||
|
}
|
||||||
|
|
||||||
|
const pair<const WisdomPass, FFTOptions::Performance>* FFTWisdom::find_optimal_options(unsigned Nx, unsigned Ny, unsigned radix,
|
||||||
|
Mode mode, Target input_target, Target output_target, const FFTOptions::Type &type) const
|
||||||
|
{
|
||||||
|
WisdomPass pass = {
|
||||||
|
{
|
||||||
|
Nx, Ny, radix, mode, input_target, output_target,
|
||||||
|
type,
|
||||||
|
},
|
||||||
|
0.0,
|
||||||
|
};
|
||||||
|
|
||||||
|
auto itr = library.find(pass);
|
||||||
|
return itr != end(library) ? (&(*itr)) : nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
const FFTOptions::Performance& FFTWisdom::find_optimal_options_or_default(unsigned Nx, unsigned Ny, unsigned radix,
|
||||||
|
Mode mode, Target input_target, Target output_target, const FFTOptions &base_options) const
|
||||||
|
{
|
||||||
|
WisdomPass pass = {
|
||||||
|
{
|
||||||
|
Nx, Ny, radix, mode, input_target, output_target,
|
||||||
|
base_options.type,
|
||||||
|
},
|
||||||
|
0.0,
|
||||||
|
};
|
||||||
|
|
||||||
|
auto itr = library.find(pass);
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
if (itr == end(library))
|
||||||
|
{
|
||||||
|
context->log("Didn't find options for (%u x %u, radix %u, mode %u, input_target %u, output_target %u)\n",
|
||||||
|
Nx, Ny, radix, unsigned(mode), unsigned(input_target), unsigned(output_target));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return itr != end(library) ? itr->second : base_options.performance;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef GLFFT_SERIALIZATION
|
||||||
|
std::string FFTWisdom::archive() const
|
||||||
|
{
|
||||||
|
StringBuffer s;
|
||||||
|
PrettyWriter<StringBuffer> writer{s};
|
||||||
|
|
||||||
|
writer.StartObject();
|
||||||
|
writer.String("library");
|
||||||
|
|
||||||
|
// Serialize all wisdom accumulated to a string.
|
||||||
|
writer.StartArray();
|
||||||
|
for (auto &entry : library)
|
||||||
|
{
|
||||||
|
writer.StartObject();
|
||||||
|
|
||||||
|
writer.String("scenario");
|
||||||
|
writer.StartObject();
|
||||||
|
writer.String("nx");
|
||||||
|
writer.Uint(entry.first.pass.Nx);
|
||||||
|
writer.String("ny");
|
||||||
|
writer.Uint(entry.first.pass.Ny);
|
||||||
|
writer.String("radix");
|
||||||
|
writer.Uint(entry.first.pass.radix);
|
||||||
|
writer.String("mode");
|
||||||
|
writer.Uint(entry.first.pass.mode);
|
||||||
|
writer.String("input_target");
|
||||||
|
writer.Uint(entry.first.pass.input_target);
|
||||||
|
writer.String("output_target");
|
||||||
|
writer.Uint(entry.first.pass.output_target);
|
||||||
|
writer.EndObject();
|
||||||
|
|
||||||
|
writer.String("type");
|
||||||
|
writer.StartObject();
|
||||||
|
writer.String("fp16");
|
||||||
|
writer.Bool(entry.first.pass.type.fp16);
|
||||||
|
writer.String("input_fp16");
|
||||||
|
writer.Bool(entry.first.pass.type.input_fp16);
|
||||||
|
writer.String("output_fp16");
|
||||||
|
writer.Bool(entry.first.pass.type.output_fp16);
|
||||||
|
writer.String("normalize");
|
||||||
|
writer.Bool(entry.first.pass.type.normalize);
|
||||||
|
writer.EndObject();
|
||||||
|
|
||||||
|
writer.String("performance");
|
||||||
|
writer.StartObject();
|
||||||
|
writer.String("shared_banked");
|
||||||
|
writer.Bool(entry.second.shared_banked);
|
||||||
|
writer.String("vector_size");
|
||||||
|
writer.Uint(entry.second.vector_size);
|
||||||
|
writer.String("workgroup_size_x");
|
||||||
|
writer.Uint(entry.second.workgroup_size_x);
|
||||||
|
writer.String("workgroup_size_y");
|
||||||
|
writer.Uint(entry.second.workgroup_size_y);
|
||||||
|
writer.EndObject();
|
||||||
|
|
||||||
|
writer.String("cost");
|
||||||
|
writer.Double(entry.first.cost);
|
||||||
|
|
||||||
|
writer.EndObject();
|
||||||
|
}
|
||||||
|
writer.EndArray();
|
||||||
|
writer.EndObject();
|
||||||
|
return s.GetString();
|
||||||
|
}
|
||||||
|
|
||||||
|
void FFTWisdom::extract(const char *json)
|
||||||
|
{
|
||||||
|
Document document;
|
||||||
|
document.Parse(json);
|
||||||
|
|
||||||
|
// Exception safe, we don't want to risk throwing in the middle of the
|
||||||
|
// loop, leaving the library is broken state.
|
||||||
|
unordered_map<WisdomPass, FFTOptions::Performance> new_library;
|
||||||
|
|
||||||
|
auto &lib = document["library"];
|
||||||
|
|
||||||
|
// y u no begin(), end() :(
|
||||||
|
for (Value::ConstValueIterator itr = lib.Begin(); itr != lib.End(); ++itr)
|
||||||
|
{
|
||||||
|
auto &v = *itr;
|
||||||
|
|
||||||
|
WisdomPass pass;
|
||||||
|
FFTOptions::Performance perf;
|
||||||
|
|
||||||
|
pass.cost = v["cost"].GetDouble();
|
||||||
|
|
||||||
|
auto &scenario = v["scenario"];
|
||||||
|
pass.pass.Nx = scenario["nx"].GetUint();
|
||||||
|
pass.pass.Ny = scenario["ny"].GetUint();
|
||||||
|
pass.pass.radix = scenario["radix"].GetUint();
|
||||||
|
pass.pass.mode = static_cast<Mode>(scenario["mode"].GetUint());
|
||||||
|
pass.pass.input_target = static_cast<Target>(scenario["input_target"].GetUint());
|
||||||
|
pass.pass.output_target = static_cast<Target>(scenario["output_target"].GetUint());
|
||||||
|
|
||||||
|
auto &type = v["type"];
|
||||||
|
pass.pass.type.fp16 = type["fp16"].GetBool();
|
||||||
|
pass.pass.type.input_fp16 = type["input_fp16"].GetBool();
|
||||||
|
pass.pass.type.output_fp16 = type["output_fp16"].GetBool();
|
||||||
|
pass.pass.type.normalize = type["normalize"].GetBool();
|
||||||
|
|
||||||
|
auto &performance = v["performance"];
|
||||||
|
perf.shared_banked = performance["shared_banked"].GetBool();
|
||||||
|
perf.vector_size = performance["vector_size"].GetUint();
|
||||||
|
perf.workgroup_size_x = performance["workgroup_size_x"].GetUint();
|
||||||
|
perf.workgroup_size_y = performance["workgroup_size_y"].GetUint();
|
||||||
|
|
||||||
|
new_library[pass] = perf;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exception safe.
|
||||||
|
swap(library, new_library);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
149
glfft/glfft_wisdom.hpp
Normal file
149
glfft/glfft_wisdom.hpp
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
/* Copyright (C) 2015 Hans-Kristian Arntzen <maister@archlinux.us>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge,
|
||||||
|
* to any person obtaining a copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation the rights to
|
||||||
|
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
* and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||||
|
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef GLFFT_WISDOM_HPP__
|
||||||
|
#define GLFFT_WISDOM_HPP__
|
||||||
|
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <utility>
|
||||||
|
#include <string>
|
||||||
|
#include "glfft_common.hpp"
|
||||||
|
#include "glfft_interface.hpp"
|
||||||
|
|
||||||
|
namespace GLFFT
|
||||||
|
{
|
||||||
|
|
||||||
|
struct WisdomPass
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
unsigned Nx;
|
||||||
|
unsigned Ny;
|
||||||
|
unsigned radix;
|
||||||
|
Mode mode;
|
||||||
|
Target input_target;
|
||||||
|
Target output_target;
|
||||||
|
FFTOptions::Type type;
|
||||||
|
} pass;
|
||||||
|
|
||||||
|
double cost;
|
||||||
|
|
||||||
|
bool operator==(const WisdomPass &other) const
|
||||||
|
{
|
||||||
|
return std::memcmp(&pass, &other.pass, sizeof(pass)) == 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace std
|
||||||
|
{
|
||||||
|
template<>
|
||||||
|
struct hash<GLFFT::WisdomPass>
|
||||||
|
{
|
||||||
|
std::size_t operator()(const GLFFT::WisdomPass ¶ms) const
|
||||||
|
{
|
||||||
|
std::size_t h = 0;
|
||||||
|
hash<uint8_t> hasher;
|
||||||
|
for (std::size_t i = 0; i < sizeof(params.pass); i++)
|
||||||
|
{
|
||||||
|
h ^= hasher(reinterpret_cast<const uint8_t*>(¶ms.pass)[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace GLFFT
|
||||||
|
{
|
||||||
|
|
||||||
|
// Adds information which depends on the GPU vendor.
|
||||||
|
// This can speed up learning process, since there will be fewer "obviously wrong" settings to test.
|
||||||
|
struct FFTStaticWisdom
|
||||||
|
{
|
||||||
|
enum Tristate { True = 1, False = 0, DontCare = -1 };
|
||||||
|
|
||||||
|
unsigned min_workgroup_size = 1;
|
||||||
|
unsigned min_workgroup_size_shared = 1;
|
||||||
|
unsigned max_workgroup_size = 128; // GLES 3.1 mandates support for this.
|
||||||
|
unsigned min_vector_size = 2;
|
||||||
|
unsigned max_vector_size = 4;
|
||||||
|
Tristate shared_banked = DontCare;
|
||||||
|
};
|
||||||
|
|
||||||
|
class FFTWisdom
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
std::pair<double, FFTOptions::Performance> learn_optimal_options(Context *ctx,
|
||||||
|
unsigned Nx, unsigned Ny, unsigned radix,
|
||||||
|
Mode mode, Target input_target, Target output_target, const FFTOptions::Type &type);
|
||||||
|
|
||||||
|
void learn_optimal_options_exhaustive(Context *ctx,
|
||||||
|
unsigned Nx, unsigned Ny,
|
||||||
|
Type type, Target input_target, Target output_target, const FFTOptions::Type &fft_type);
|
||||||
|
|
||||||
|
const std::pair<const WisdomPass, FFTOptions::Performance>* find_optimal_options(unsigned Nx, unsigned Ny, unsigned radix,
|
||||||
|
Mode mode, Target input_target, Target output_target, const FFTOptions::Type &base_options) const;
|
||||||
|
|
||||||
|
const FFTOptions::Performance& find_optimal_options_or_default(unsigned Nx, unsigned Ny, unsigned radix,
|
||||||
|
Mode mode, Target input_target, Target output_target, const FFTOptions &base_options) const;
|
||||||
|
|
||||||
|
void set_static_wisdom(FFTStaticWisdom static_wisdom) { this->static_wisdom = static_wisdom; }
|
||||||
|
static FFTStaticWisdom get_static_wisdom_from_renderer(Context *context);
|
||||||
|
|
||||||
|
void set_bench_params(unsigned warmup,
|
||||||
|
unsigned iterations, unsigned dispatches, double timeout)
|
||||||
|
{
|
||||||
|
params.warmup = warmup;
|
||||||
|
params.iterations = iterations;
|
||||||
|
params.dispatches = dispatches;
|
||||||
|
params.timeout = timeout;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef GLFFT_SERIALIZATION
|
||||||
|
// Serialization interface.
|
||||||
|
std::string archive() const;
|
||||||
|
void extract(const char *json);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unordered_map<WisdomPass, FFTOptions::Performance> library;
|
||||||
|
|
||||||
|
std::pair<double, FFTOptions::Performance> study(Context *context,
|
||||||
|
const WisdomPass &pass, FFTOptions::Type options) const;
|
||||||
|
|
||||||
|
double bench(Context *cmd, Resource *output, Resource *input,
|
||||||
|
const WisdomPass &pass, const FFTOptions &options,
|
||||||
|
const std::shared_ptr<ProgramCache> &cache) const;
|
||||||
|
|
||||||
|
FFTStaticWisdom static_wisdom;
|
||||||
|
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
unsigned warmup = 2;
|
||||||
|
unsigned iterations = 20;
|
||||||
|
unsigned dispatches = 50;
|
||||||
|
double timeout = 1.0;
|
||||||
|
} params;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
Reference in New Issue
Block a user