1 // ----------------------------------------------------------------------------
3 // Copyright (C) 2006-2018 Fons Adriaensen <fons@linuxaudio.org>
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 3 of the License, or
8 // (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program. If not, see <http://www.gnu.org/licenses/>.
18 // ----------------------------------------------------------------------------
26 #include <windows.h> // Needed for MSVC 'Sleep()'
29 #include "zita-convolver/zita-convolver.h"
31 using namespace ArdourZita;
33 float Convproc::_mac_cost = 1.0f;
34 float Convproc::_fft_cost = 5.0f;
37 calloc_real (uint32_t k)
39 float* p = fftwf_alloc_real (k);
41 throw (Converror (Converror::MEM_ALLOC));
42 memset (p, 0, k * sizeof (float));
47 calloc_complex (uint32_t k)
49 fftwf_complex* p = fftwf_alloc_complex (k);
51 throw (Converror (Converror::MEM_ALLOC));
52 memset (p, 0, k * sizeof (fftwf_complex));
56 Convproc::Convproc (void)
68 memset (_inpbuff, 0, MAXINP * sizeof (float*));
69 memset (_outbuff, 0, MAXOUT * sizeof (float*));
70 memset (_convlev, 0, MAXLEV * sizeof (Convlevel*));
73 Convproc::~Convproc (void)
80 Convproc::set_options (uint32_t options)
86 Convproc::set_skipcnt (uint32_t skipcnt)
88 if ((_quantum == _minpart) && (_quantum == _maxpart))
93 Convproc::configure (uint32_t ninp,
101 uint32_t offs, npar, size, pind, nmin, i;
102 int prio, step, d, r, s;
105 if (_state != ST_IDLE)
106 return Converror::BAD_STATE;
107 if ( (ninp < 1) || (ninp > MAXINP)
108 || (nout < 1) || (nout > MAXOUT)
109 || (quantum & (quantum - 1))
110 || (quantum < MINQUANT)
111 || (quantum > MAXQUANT)
112 || (minpart & (minpart - 1))
113 || (minpart < MINPART)
114 || (minpart < quantum)
115 || (minpart > MAXDIVIS * quantum)
116 || (maxpart & (maxpart - 1))
117 || (maxpart > MAXPART)
118 || (maxpart < minpart))
119 return Converror::BAD_PARAM;
121 nmin = (ninp < nout) ? ninp : nout;
123 density = 1.0f / nmin;
126 cfft = _fft_cost * (ninp + nout);
127 cmac = _mac_cost * ninp * nout * density;
128 step = (cfft < 4 * cmac) ? 1 : 2;
130 r = maxpart / minpart;
131 s = (r & 0xAAAA) ? 1 : 2;
134 nmin = (s == 1) ? 2 : 6;
135 if (minpart == quantum)
139 while (size < minpart) {
145 for (offs = pind = 0; offs < maxsize; pind++) {
146 npar = (maxsize - offs + size - 1) / size;
147 if ((size < maxpart) && (npar > nmin)) {
150 d = d - (d + r - 1) / r;
154 _convlev[pind] = new Convlevel ();
155 _convlev[pind]->configure (prio, offs, npar, size, _options);
157 if (offs < maxsize) {
161 nmin = (s == 1) ? 2 : 6;
174 for (i = 0; i < ninp; i++)
175 _inpbuff[i] = new float[_inpsize];
176 for (i = 0; i < nout; i++)
177 _outbuff[i] = new float[_minpart];
180 return Converror::MEM_ALLOC;
188 Convproc::impdata_create (uint32_t inp,
197 if (_state != ST_STOP)
198 return Converror::BAD_STATE;
199 if ((inp >= _ninp) || (out >= _nout))
200 return Converror::BAD_PARAM;
202 for (j = 0; j < _nlevels; j++) {
203 _convlev[j]->impdata_write (inp, out, step, data, ind0, ind1, true);
207 return Converror::MEM_ALLOC;
213 Convproc::impdata_clear (uint32_t inp, uint32_t out)
217 if (_state < ST_STOP)
218 return Converror::BAD_STATE;
219 for (k = 0; k < _nlevels; k++)
220 _convlev[k]->impdata_clear (inp, out);
225 Convproc::impdata_update (uint32_t inp,
234 if (_state < ST_STOP)
235 return Converror::BAD_STATE;
236 if ((inp >= _ninp) || (out >= _nout))
237 return Converror::BAD_PARAM;
238 for (j = 0; j < _nlevels; j++) {
239 _convlev[j]->impdata_write (inp, out, step, data, ind0, ind1, false);
245 Convproc::impdata_link (uint32_t inp1,
252 if ((inp1 >= _ninp) || (out1 >= _nout))
253 return Converror::BAD_PARAM;
254 if ((inp2 >= _ninp) || (out2 >= _nout))
255 return Converror::BAD_PARAM;
256 if ((inp1 == inp2) && (out1 == out2))
257 return Converror::BAD_PARAM;
258 if (_state != ST_STOP)
259 return Converror::BAD_STATE;
261 for (j = 0; j < _nlevels; j++) {
262 _convlev[j]->impdata_link (inp1, out1, inp2, out2);
266 return Converror::MEM_ALLOC;
272 Convproc::reset (void)
276 if (_state == ST_IDLE)
277 return Converror::BAD_STATE;
278 for (k = 0; k < _ninp; k++)
279 memset (_inpbuff[k], 0, _inpsize * sizeof (float));
280 for (k = 0; k < _nout; k++)
281 memset (_outbuff[k], 0, _minpart * sizeof (float));
282 for (k = 0; k < _nlevels; k++)
283 _convlev[k]->reset (_inpsize, _minpart, _inpbuff, _outbuff);
288 Convproc::start_process (int abspri, int policy)
292 if (_state != ST_STOP)
293 return Converror::BAD_STATE;
299 for (k = (_minpart == _quantum) ? 1 : 0; k < _nlevels; k++) {
300 _convlev[k]->start (abspri, policy);
307 Convproc::process (bool sync)
312 if (_state != ST_PROC)
314 _inpoffs += _quantum;
315 if (_inpoffs == _inpsize)
317 _outoffs += _quantum;
318 if (_outoffs == _minpart) {
320 for (k = 0; k < _nout; k++)
321 memset (_outbuff[k], 0, _minpart * sizeof (float));
322 for (k = 0; k < _nlevels; k++)
323 f |= _convlev[k]->readout (sync, _skipcnt);
324 if (_skipcnt < _minpart)
327 _skipcnt -= _minpart;
329 if (++_latecnt >= 5) {
330 if (~_options & OPT_LATE_CONTIN)
341 Convproc::stop_process (void)
345 if (_state != ST_PROC)
346 return Converror::BAD_STATE;
347 for (k = 0; k < _nlevels; k++)
348 _convlev[k]->stop ();
354 Convproc::cleanup (void)
358 while (!check_stop ()) {
365 for (k = 0; k < _ninp; k++) {
366 delete[] _inpbuff[k];
369 for (k = 0; k < _nout; k++) {
370 delete[] _outbuff[k];
373 for (k = 0; k < _nlevels; k++) {
392 Convproc::check_stop (void)
396 for (k = 0; (k < _nlevels) && (_convlev[k]->_stat == Convlevel::ST_IDLE); k++)
406 Convproc::print (FILE* F)
410 for (k = 0; k < _nlevels; k++)
411 _convlev[k]->print (F);
414 #ifdef ENABLE_VECTOR_MODE
415 typedef float FV4 __attribute__ ((vector_size (16)));
418 Convlevel::Convlevel (void)
423 #ifndef PTW32_VERSION
436 Convlevel::~Convlevel (void)
442 Convlevel::configure (int prio,
448 int fftwopt = (options & OPT_FFTW_MEASURE) ? FFTW_MEASURE : FFTW_ESTIMATE;
456 _time_data = calloc_real (2 * _parsize);
457 _prep_data = calloc_real (2 * _parsize);
458 _freq_data = calloc_complex (_parsize + 1);
459 _plan_r2c = fftwf_plan_dft_r2c_1d (2 * _parsize, _time_data, _freq_data, fftwopt);
460 _plan_c2r = fftwf_plan_dft_c2r_1d (2 * _parsize, _freq_data, _time_data, fftwopt);
461 if (_plan_r2c && _plan_c2r)
463 throw (Converror (Converror::MEM_ALLOC));
467 Convlevel::impdata_write (uint32_t inp,
476 int32_t j, j0, j1, n;
483 i1 = i0 + _npar * _parsize;
484 if ((i0 >= n) || (i1 <= 0))
488 M = findmacnode (inp, out, true);
489 if (M == 0 || M->_link)
492 M->alloc_fftb (_npar);
494 M = findmacnode (inp, out, false);
495 if (M == 0 || M->_link || M->_fftb == 0)
499 norm = 0.5f / _parsize;
500 for (k = 0; k < _npar; k++) {
502 if ((i0 < n) && (i1 > 0)) {
504 if (fftb == 0 && create) {
505 M->_fftb[k] = fftb = calloc_complex (_parsize + 1);
508 memset (_prep_data, 0, 2 * _parsize * sizeof (float));
509 j0 = (i0 < 0) ? 0 : i0;
510 j1 = (i1 > n) ? n : i1;
511 for (j = j0; j < j1; j++)
512 _prep_data[j - i0] = norm * data[j * step];
513 fftwf_execute_dft_r2c (_plan_r2c, _prep_data, _freq_data);
514 #ifdef ENABLE_VECTOR_MODE
515 if (_options & OPT_VECTOR_MODE)
516 fftswap (_freq_data);
518 for (j = 0; j <= (int)_parsize; j++) {
519 fftb[j][0] += _freq_data[j][0];
520 fftb[j][1] += _freq_data[j][1];
529 Convlevel::impdata_clear (uint32_t inp, uint32_t out)
534 M = findmacnode (inp, out, false);
535 if (M == 0 || M->_link || M->_fftb == 0)
537 for (i = 0; i < _npar; i++) {
539 memset (M->_fftb[i], 0, (_parsize + 1) * sizeof (fftwf_complex));
545 Convlevel::impdata_link (uint32_t inp1,
553 M1 = findmacnode (inp1, out1, false);
556 M2 = findmacnode (inp2, out2, true);
562 Convlevel::reset (uint32_t inpsize,
575 for (X = _inp_list; X; X = X->_next) {
576 for (i = 0; i < _npar; i++) {
577 memset (X->_ffta[i], 0, (_parsize + 1) * sizeof (fftwf_complex));
580 for (Y = _out_list; Y; Y = Y->_next) {
581 for (i = 0; i < 3; i++) {
582 memset (Y->_buff[i], 0, _parsize * sizeof (float));
585 if (_parsize == _outsize) {
589 _outoffs = _parsize / 2;
590 _inpoffs = _inpsize - _outoffs;
592 _bits = _parsize / _outsize;
601 Convlevel::start (int abspri, int policy)
605 struct sched_param parm;
607 #ifndef PTW32_VERSION
610 min = sched_get_priority_min (policy);
611 max = sched_get_priority_max (policy);
617 parm.sched_priority = abspri;
618 pthread_attr_init (&attr);
619 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
620 pthread_attr_setschedpolicy (&attr, policy);
621 pthread_attr_setschedparam (&attr, &parm);
622 pthread_attr_setscope (&attr, PTHREAD_SCOPE_SYSTEM);
623 pthread_attr_setinheritsched (&attr, PTHREAD_EXPLICIT_SCHED);
624 pthread_attr_setstacksize (&attr, 0x10000);
625 pthread_create (&_pthr, &attr, static_main, this);
626 pthread_attr_destroy (&attr);
630 Convlevel::stop (void)
632 if (_stat != ST_IDLE) {
639 Convlevel::cleanup (void)
667 fftwf_destroy_plan (_plan_r2c);
668 fftwf_destroy_plan (_plan_c2r);
669 fftwf_free (_time_data);
670 fftwf_free (_prep_data);
671 fftwf_free (_freq_data);
680 Convlevel::static_main (void* arg)
682 ((Convlevel*)arg)->main ();
687 Convlevel::main (void)
692 if (_stat == ST_TERM) {
694 #ifndef PTW32_VERSION
705 Convlevel::process (bool skip)
707 uint32_t i, i1, j, k, n1, n2, opi1, opi2;
720 if (_inpoffs >= _inpsize) {
721 _inpoffs -= _inpsize;
726 opi1 = (_opind + 1) % 3;
727 opi2 = (_opind + 2) % 3;
729 for (X = _inp_list; X; X = X->_next) {
730 inpd = _inpbuff[X->_inp];
732 memcpy (_time_data, inpd + i1, n1 * sizeof (float));
734 memcpy (_time_data + n1, inpd, n2 * sizeof (float));
735 memset (_time_data + _parsize, 0, _parsize * sizeof (float));
736 fftwf_execute_dft_r2c (_plan_r2c, _time_data, X->_ffta[_ptind]);
737 #ifdef ENABLE_VECTOR_MODE
738 if (_options & OPT_VECTOR_MODE)
739 fftswap (X->_ffta[_ptind]);
744 for (Y = _out_list; Y; Y = Y->_next) {
745 outd = Y->_buff[opi2];
746 memset (outd, 0, _parsize * sizeof (float));
749 for (Y = _out_list; Y; Y = Y->_next) {
750 memset (_freq_data, 0, (_parsize + 1) * sizeof (fftwf_complex));
751 for (M = Y->_list; M; M = M->_next) {
754 for (j = 0; j < _npar; j++) {
756 fftb = M->_link ? M->_link->_fftb[j] : M->_fftb[j];
758 #ifdef ENABLE_VECTOR_MODE
759 if (_options & OPT_VECTOR_MODE) {
762 FV4* D = (FV4*)_freq_data;
763 for (k = 0; k < _parsize; k += 4) {
764 D[0] += A[0] * B[0] - A[1] * B[1];
765 D[1] += A[0] * B[1] + A[1] * B[0];
770 _freq_data[_parsize][0] += ffta[_parsize][0] * fftb[_parsize][0];
771 _freq_data[_parsize][1] = 0;
775 for (k = 0; k <= _parsize; k++) {
776 _freq_data[k][0] += ffta[k][0] * fftb[k][0] - ffta[k][1] * fftb[k][1];
777 _freq_data[k][1] += ffta[k][0] * fftb[k][1] + ffta[k][1] * fftb[k][0];
787 #ifdef ENABLE_VECTOR_MODE
788 if (_options & OPT_VECTOR_MODE)
789 fftswap (_freq_data);
791 fftwf_execute_dft_c2r (_plan_c2r, _freq_data, _time_data);
792 outd = Y->_buff[opi1];
793 for (k = 0; k < _parsize; k++)
794 outd[k] += _time_data[k];
795 outd = Y->_buff[opi2];
796 memcpy (outd, _time_data + _parsize, _parsize * sizeof (float));
806 Convlevel::readout (bool sync, uint32_t skipcnt)
812 _outoffs += _outsize;
813 if (_outoffs == _parsize) {
815 if (_stat == ST_PROC) {
819 else if (_done.trywait ())
828 process (skipcnt >= 2 * _parsize);
834 for (Y = _out_list; Y; Y = Y->_next) {
835 p = Y->_buff[_opind] + _outoffs;
836 q = _outbuff[Y->_out];
837 for (i = 0; i < _outsize; i++)
841 return (_wait > 1) ? _bits : 0;
845 Convlevel::print (FILE* F)
847 fprintf (F, "prio = %4d, offs = %6d, parsize = %5d, npar = %3d\n", _prio, _offs, _parsize, _npar);
851 Convlevel::findmacnode (uint32_t inp, uint32_t out, bool create)
857 for (X = _inp_list; X && (X->_inp != inp); X = X->_next)
862 X = new Inpnode (inp);
863 X->_next = _inp_list;
865 X->alloc_ffta (_npar, _parsize);
868 for (Y = _out_list; Y && (Y->_out != out); Y = Y->_next)
873 Y = new Outnode (out, _parsize);
874 Y->_next = _out_list;
878 for (M = Y->_list; M && (M->_inpn != X); M = M->_next)
891 #ifdef ENABLE_VECTOR_MODE
894 Convlevel::fftswap (fftwf_complex* p)
896 uint32_t n = _parsize;
913 Inpnode::Inpnode (uint16_t inp)
921 Inpnode::~Inpnode (void)
927 Inpnode::alloc_ffta (uint16_t npar, int32_t size)
930 _ffta = new fftwf_complex*[_npar];
931 for (int i = 0; i < _npar; i++) {
932 _ffta[i] = calloc_complex (size + 1);
937 Inpnode::free_ffta (void)
941 for (uint16_t i = 0; i < _npar; i++) {
942 fftwf_free (_ffta[i]);
949 Macnode::Macnode (Inpnode* inpn)
958 Macnode::~Macnode (void)
964 Macnode::alloc_fftb (uint16_t npar)
967 _fftb = new fftwf_complex*[_npar];
968 for (uint16_t i = 0; i < _npar; i++) {
974 Macnode::free_fftb (void)
978 for (uint16_t i = 0; i < _npar; i++) {
979 fftwf_free (_fftb[i]);
986 Outnode::Outnode (uint16_t out, int32_t size)
991 _buff[0] = calloc_real (size);
992 _buff[1] = calloc_real (size);
993 _buff[2] = calloc_real (size);
996 Outnode::~Outnode (void)
998 fftwf_free (_buff[0]);
999 fftwf_free (_buff[1]);
1000 fftwf_free (_buff[2]);