2 Copyright (C) 2015 Carl Hetherington <cth@carlh.net>
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 #include "poznan_encoder.h"
21 #include "exceptions.h"
23 #include "raw_convert.h"
24 #include <poznan/tier2/markers.h>
25 #include <dcp/openjpeg_image.h>
27 #include <thrust/system/cuda/error.h>
33 using boost::shared_ptr;
35 PoznanEncoder::PoznanEncoder ()
37 void* config = open_library ("config");
38 void* preprocessing = open_library ("preprocessing");
39 void* dwt = open_library ("dwt");
40 void* tier1 = open_library ("tier1");
41 void* gpu_coeff_coder = open_library ("gpu_coeff_coder");
42 void* tier2 = open_library ("tier2");
43 void* types = open_library ("types");
44 void* misc = open_library ("misc");
46 _init_device = (void (*)(type_parameters *)) dlsym (config, "init_device");
47 _color_coder_lossy = (void (*)(type_image *)) dlsym (preprocessing, "color_coder_lossy");
48 _fwt = (void (*)(type_tile *)) dlsym (dwt, "fwt");
49 _quantize_tile = (void (*)(type_tile *)) dlsym (tier1, "quantize_tile");
50 _encode_tile = (void (*)(type_tile *)) dlsym (gpu_coeff_coder, "encode_tile");
51 _set_coding_parameters = (void (*)(type_image *, type_parameters *)) dlsym (types, "set_coding_parameters");
52 _init_tiles = (void (*)(type_image **, type_parameters *)) dlsym (types, "init_tiles");
53 _init_buffer = (void (*)(type_buffer *)) dlsym (types, "init_buffer");
54 _encode_codestream = (void (*)(type_buffer *, type_image *)) dlsym (tier2, "encode_codestream");
55 _cuda_h_allocate_mem = (void (*)(void **, uint64_t)) dlsym (misc, "cuda_h_allocate_mem");
56 _cuda_memcpy_htd = (void (*)(void *, void *, uint64_t)) dlsym (misc, "cuda_memcpy_htd");
57 _cuda_h_free = (void (*)(void *)) dlsym (misc, "cuda_h_free");
60 !_init_device || !_color_coder_lossy || !_fwt || !_quantize_tile ||
61 !_encode_tile || !_set_coding_parameters || !_init_tiles ||
62 !_init_buffer || !_encode_codestream || !_cuda_h_allocate_mem ||
63 !_cuda_memcpy_htd || !_cuda_h_free) {
64 throw JPEG2000EncoderUnavailableException (name(), "missing symbol");
69 PoznanEncoder::open_library (string library_name)
71 /* XXX: need cross-platform implementation of dlopen etc. */
73 library_name = "libdcpomatic-" + library_name + ".so";
74 void* lib = dlopen (library_name.c_str(), RTLD_LAZY | RTLD_GLOBAL);
76 throw JPEG2000EncoderUnavailableException (name(), "could not find " + library_name + " (" + dlerror() + ")");
82 PoznanEncoder::parameters_changed ()
84 /* One tile which covers entire image */
85 _param.param_tile_w = -1;
86 _param.param_tile_h = -1;
88 /* Wavelet decomposition levels */
89 _param.param_tile_comp_dlvls = _resolution.get() == RESOLUTION_2K ? 5 : 6;
91 /* Power of 2 for maximum codeblock size */
92 _param.param_cblk_exp_w = 5;
93 _param.param_cblk_exp_h = 5;
95 /* DWT 9/7 transform */
96 _param.param_wavelet_type = 1;
99 _param.param_use_mct = 1;
101 /* Device to run on */
102 _param.param_device = 0;
104 /* Target file size */
105 _param.param_target_size = (_bandwidth.get() / _frame_rate.get()) / 8;
106 if (_threed.get ()) {
107 _param.param_target_size /= 2;
110 /* Bits per pixel per component */
111 _param.param_bp = 12;
113 /* Don't know about these: use the defaults */
114 _param.param_use_part2_mct = 0;
115 _param.param_mct_compression_method = 0;
116 _param.param_mct_klt_iterations = 10000;
117 _param.param_mct_klt_border_eigenvalue = 0.000001;
118 _param.param_mct_klt_err = 1.0e-7;
120 _init_device (&_param);
124 PoznanEncoder::name () const
126 return _("CUDA (GPU) encoder (Poznan Supercomputing and Networking Center)");
130 PoznanEncoder::do_encode (shared_ptr<const dcp::OpenJPEGImage> input)
132 type_image* img = new type_image;
134 img->width = input->size().width;
135 img->height = input->size().height;
137 img->num_components = 3;
138 img->num_range_bits = 12;
139 img->sign = UNSIGNED;
140 img->num_dlvls = _param.param_tile_comp_dlvls;
142 img->wavelet_type = _param.param_wavelet_type;
143 img->use_mct = _param.param_use_mct;
144 img->use_part2_mct = _param.param_use_part2_mct;
145 img->mct_compression_method = _param.param_mct_compression_method;
147 img->coding_style = CODING_STYLE_PRECINCTS_DEFINED;
148 img->prog_order = COMP_POS_RES_LY_PROG;
149 img->num_layers = NUM_LAYERS;
151 _set_coding_parameters (img, &_param);
152 _init_tiles (&img, &_param);
154 type_tile* tile = &(img->tile[0]);
156 /* XXX: it's a shame about this int -> float conversion */
157 for (int i = 0; i < 3; ++i) {
158 type_tile_comp* c = &tile->tile_comp[i];
160 std::cout << "Tile comp " << i << ": " << c->width << "x" << c->height << "\n";
161 int const pixels = c->width * c->height;
162 _cuda_h_allocate_mem ((void **) &c->img_data, pixels * sizeof (type_data));
163 for (int j = 0; j < pixels; ++j) {
164 c->img_data[j] = float (input->data(i)[j]);
165 //c->img_data[j] = input->data(i)[j];
167 _cuda_memcpy_htd (c->img_data, c->img_data_d, pixels * sizeof (type_data));
168 _cuda_h_free (c->img_data);
171 _color_coder_lossy (img);
173 std::cout << "Tile " << tile->width << "x" << tile->height << "\n";
175 _quantize_tile (tile);
179 _init_buffer (&buffer);
180 _encode_codestream (&buffer, img);
181 if (cudaGetLastError () != cudaSuccess) {
182 throw EncodeError ("CUDA error");
185 std::cout << img->num_tiles << " tiles.\n";
186 std::cout << "got " << buffer.bytes_count << " bytes.\n";
187 Data encoded (buffer.data, buffer.bytes_count);