Add loop for gpu multithread; add some timing.

author Carl Hetherington <cth@carlh.net>

Mon, 24 Aug 2020 07:44:30 +0000 (09:44 +0200)

committer Carl Hetherington <cth@carlh.net>

Mon, 24 Aug 2020 07:44:30 +0000 (09:44 +0200)
author Carl Hetherington <cth@carlh.net>
Mon, 24 Aug 2020 07:44:30 +0000 (09:44 +0200)
committer Carl Hetherington <cth@carlh.net>
Mon, 24 Aug 2020 07:44:30 +0000 (09:44 +0200)
diff --git a/src/lib/gpu_j2k_encode_worker.cc b/src/lib/gpu_j2k_encode_worker.cc

index ba0b2036c4457c2883b7f0c9c8c8288b6b87afc5..eecd9962ddf43f8a7779fc9d96c13a1228075a5f 100644 (file)
--- a/src/lib/gpu_j2k_encode_worker.cc
+++ b/src/lib/gpu_j2k_encode_worker.cc
@@ -45,7 +45,11 @@ using boost::shared_ptr;
  
  GPUJ2KEncodeWorker::GPUJ2KEncodeWorker ()
  {
-       init_device (0);
+       static bool init_called = false;
+       if (!init_called) {
+               init_device (0);
+               init_called = true;
+       }
  }
  
  optional<Data>
@@ -80,31 +84,40 @@ GPUJ2KEncodeWorker::encode (shared_ptr<DCPVideo> vf)
         init_tiles (&img, width, height, 5, 5);
         type_tile* tile = &(img.tile[0]);
  
-       PeriodTimer encode("poz-encode");
+       StateTimer encode("");
  
         // XXX: it's a big shame about this int -> float conversion
         for (int i = 0; i < 3; ++i) {
+               encode.set("convert");
                 type_tile_comp* c = &tile->tile_comp[i];
                 c->tile_comp_no = i;
                 int const pixels = c->width * c->height;
                 for (int j = 0; j < pixels; ++j) {
                         c->img_data[j] = float (image->data(i)[j]);
                 }
+               encode.set("copy to gpu");
                 cuda_memcpy_htd (c->img_data, c->img_data_d, pixels * sizeof(type_data));
         }
  
+       encode.set("mct");
         mct (&img, 10000, 0.000001, 1.0e-7);
+       encode.set("fwt");
         fwt (tile);
+       encode.set("quantize");
         quantize_tile (tile);
+       encode.set("encode");
         encode_tile (tile);
  
+       encode.set("t2");
         type_buffer buffer;
         init_buffer (&buffer);
         encode_codestream (&buffer, &img);
+       encode.set("sync");
         cudaThreadSynchronize ();
  
         image_destroy(&img);
  
+       encode.set("out");
         // XXX: remove this memcpy
         dcp::Data encoded (buffer.bytes_count);
         memcpy (encoded.data().get(), buffer.data, buffer.bytes_count);
diff --git a/src/lib/j2k_encoder.cc b/src/lib/j2k_encoder.cc

index 1e5c206fc932a391b735d2922791f04d85f6e98a..ba512ae252318010ee55225128f0904f2b923d64 100644 (file)
--- a/src/lib/j2k_encoder.cc
+++ b/src/lib/j2k_encoder.cc
@@ -385,9 +385,11 @@ J2KEncoder::servers_list_changed ()
                 }
         }
  
-       shared_ptr<GPUJ2KEncodeWorker> w (new GPUJ2KEncodeWorker());
-       _workers.push_back (w);
-       _threads->create_thread(boost::bind(&J2KEncoder::encoder_thread, this, w));
+       for (int i = 0; i < 1; ++i) {
+               shared_ptr<GPUJ2KEncodeWorker> w (new GPUJ2KEncodeWorker());
+               _workers.push_back (w);
+               _threads->create_thread(boost::bind(&J2KEncoder::encoder_thread, this, w));
+       }
  
         _writer->set_encoder_threads (_threads->size());
  }
author	Carl Hetherington <cth@carlh.net>
	Mon, 24 Aug 2020 07:44:30 +0000 (09:44 +0200)
committer	Carl Hetherington <cth@carlh.net>
	Mon, 24 Aug 2020 07:44:30 +0000 (09:44 +0200)
src/lib/gpu_j2k_encode_worker.cc		patch \| blob \| history
src/lib/j2k_encoder.cc		patch \| blob \| history