1 // ----------------------------------------------------------------------------
3 // Copyright (C) 2006-2018 Fons Adriaensen <fons@linuxaudio.org>
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 3 of the License, or
8 // (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program. If not, see <http://www.gnu.org/licenses/>.
18 // ----------------------------------------------------------------------------
26 #include "zita-convolver/zita-convolver.h"
28 using namespace ArdourZita;
30 float Convproc::_mac_cost = 1.0f;
31 float Convproc::_fft_cost = 5.0f;
34 static float *calloc_real (uint32_t k)
36 float *p = fftwf_alloc_real (k);
37 if (!p) throw (Converror (Converror::MEM_ALLOC));
38 memset (p, 0, k * sizeof (float));
42 static fftwf_complex *calloc_complex (uint32_t k)
44 fftwf_complex *p = fftwf_alloc_complex (k);
45 if (!p) throw (Converror (Converror::MEM_ALLOC));
46 memset (p, 0, k * sizeof (fftwf_complex));
51 Convproc::Convproc (void) :
63 memset (_inpbuff, 0, MAXINP * sizeof (float *));
64 memset (_outbuff, 0, MAXOUT * sizeof (float *));
65 memset (_convlev, 0, MAXLEV * sizeof (Convlevel *));
69 Convproc::~Convproc (void)
76 void Convproc::set_options (uint32_t options)
82 void Convproc::set_skipcnt (uint32_t skipcnt)
84 if ((_quantum == _minpart) && (_quantum == _maxpart)) _skipcnt = skipcnt;
88 int Convproc::configure (uint32_t ninp,
96 uint32_t offs, npar, size, pind, nmin, i;
97 int prio, step, d, r, s;
100 if (_state != ST_IDLE) return Converror::BAD_STATE;
101 if ( (ninp < 1) || (ninp > MAXINP)
102 || (nout < 1) || (nout > MAXOUT)
103 || (quantum & (quantum - 1))
104 || (quantum < MINQUANT)
105 || (quantum > MAXQUANT)
106 || (minpart & (minpart - 1))
107 || (minpart < MINPART)
108 || (minpart < quantum)
109 || (minpart > MAXDIVIS * quantum)
110 || (maxpart & (maxpart - 1))
111 || (maxpart > MAXPART)
112 || (maxpart < minpart)) return Converror::BAD_PARAM;
114 nmin = (ninp < nout) ? ninp : nout;
115 if (density <= 0.0f) density = 1.0f / nmin;
116 if (density > 1.0f) density = 1.0f;
117 cfft = _fft_cost * (ninp + nout);
118 cmac = _mac_cost * ninp * nout * density;
119 step = (cfft < 4 * cmac) ? 1 : 2;
122 r = maxpart / minpart;
123 s = (r & 0xAAAA) ? 1 : 2;
126 nmin = (s == 1) ? 2 : 6;
127 if (minpart == quantum) nmin++;
130 while (size < minpart)
138 for (offs = pind = 0; offs < maxsize; pind++)
140 npar = (maxsize - offs + size - 1) / size;
141 if ((size < maxpart) && (npar > nmin))
145 d = d - (d + r - 1) / r;
146 if (cfft < d * cmac) npar = nmin;
148 _convlev [pind] = new Convlevel ();
149 _convlev [pind]->configure (prio, offs, npar, size, _options);
156 nmin = (s == 1) ? 2 : 6;
169 for (i = 0; i < ninp; i++) _inpbuff [i] = new float [_inpsize];
170 for (i = 0; i < nout; i++) _outbuff [i] = new float [_minpart];
175 return Converror::MEM_ALLOC;
183 int Convproc::impdata_create (uint32_t inp,
192 if (_state != ST_STOP) return Converror::BAD_STATE;
193 if ((inp >= _ninp) || (out >= _nout)) return Converror::BAD_PARAM;
196 for (j = 0; j < _nlevels; j++)
198 _convlev [j]->impdata_write (inp, out, step, data, ind0, ind1, true);
204 return Converror::MEM_ALLOC;
210 int Convproc::impdata_clear (uint32_t inp, uint32_t out)
214 if (_state < ST_STOP) return Converror::BAD_STATE;
215 for (k = 0; k < _nlevels; k++) _convlev [k]->impdata_clear (inp, out);
220 int Convproc::impdata_update (uint32_t inp,
229 if (_state < ST_STOP) return Converror::BAD_STATE;
230 if ((inp >= _ninp) || (out >= _nout)) return Converror::BAD_PARAM;
231 for (j = 0; j < _nlevels; j++)
233 _convlev [j]->impdata_write (inp, out, step, data, ind0, ind1, false);
239 int Convproc::impdata_link (uint32_t inp1,
246 if ((inp1 >= _ninp) || (out1 >= _nout)) return Converror::BAD_PARAM;
247 if ((inp2 >= _ninp) || (out2 >= _nout)) return Converror::BAD_PARAM;
248 if ((inp1 == inp2) && (out1 == out2)) return Converror::BAD_PARAM;
249 if (_state != ST_STOP) return Converror::BAD_STATE;
252 for (j = 0; j < _nlevels; j++)
254 _convlev [j]->impdata_link (inp1, out1, inp2, out2);
260 return Converror::MEM_ALLOC;
266 int Convproc::reset (void)
270 if (_state == ST_IDLE) return Converror::BAD_STATE;
271 for (k = 0; k < _ninp; k++) memset (_inpbuff [k], 0, _inpsize * sizeof (float));
272 for (k = 0; k < _nout; k++) memset (_outbuff [k], 0, _minpart * sizeof (float));
273 for (k = 0; k < _nlevels; k++) _convlev [k]->reset (_inpsize, _minpart, _inpbuff, _outbuff);
278 int Convproc::start_process (int abspri, int policy)
282 if (_state != ST_STOP) return Converror::BAD_STATE;
288 for (k = (_minpart == _quantum) ? 1 : 0; k < _nlevels; k++)
290 _convlev [k]->start (abspri, policy);
297 int Convproc::process (bool sync)
302 if (_state != ST_PROC) return 0;
303 _inpoffs += _quantum;
304 if (_inpoffs == _inpsize) _inpoffs = 0;
305 _outoffs += _quantum;
306 if (_outoffs == _minpart)
309 for (k = 0; k < _nout; k++) memset (_outbuff [k], 0, _minpart * sizeof (float));
310 for (k = 0; k < _nlevels; k++) f |= _convlev [k]->readout (sync, _skipcnt);
311 if (_skipcnt < _minpart) _skipcnt = 0;
312 else _skipcnt -= _minpart;
317 if (~_options & OPT_LATE_CONTIN) stop_process ();
327 int Convproc::stop_process (void)
331 if (_state != ST_PROC) return Converror::BAD_STATE;
332 for (k = 0; k < _nlevels; k++) _convlev [k]->stop ();
338 int Convproc::cleanup (void)
342 while (! check_stop ())
346 for (k = 0; k < _ninp; k++)
348 delete[] _inpbuff [k];
351 for (k = 0; k < _nout; k++)
353 delete[] _outbuff [k];
356 for (k = 0; k < _nlevels; k++)
376 bool Convproc::check_stop (void)
380 for (k = 0; (k < _nlevels) && (_convlev [k]->_stat == Convlevel::ST_IDLE); k++);
390 void Convproc::print (FILE *F)
394 for (k = 0; k < _nlevels; k++) _convlev [k]->print (F);
399 typedef float FV4 __attribute__ ((vector_size(16)));
402 Convlevel::Convlevel (void) :
420 Convlevel::~Convlevel (void)
426 void Convlevel::configure (int prio,
432 int fftwopt = (options & OPT_FFTW_MEASURE) ? FFTW_MEASURE : FFTW_ESTIMATE;
440 _time_data = calloc_real (2 * _parsize);
441 _prep_data = calloc_real (2 * _parsize);
442 _freq_data = calloc_complex (_parsize + 1);
443 _plan_r2c = fftwf_plan_dft_r2c_1d (2 * _parsize, _time_data, _freq_data, fftwopt);
444 _plan_c2r = fftwf_plan_dft_c2r_1d (2 * _parsize, _freq_data, _time_data, fftwopt);
445 if (_plan_r2c && _plan_c2r) return;
446 throw (Converror (Converror::MEM_ALLOC));
450 void Convlevel::impdata_write (uint32_t inp,
459 int32_t j, j0, j1, n;
466 i1 = i0 + _npar * _parsize;
467 if ((i0 >= n) || (i1 <= 0)) return;
471 M = findmacnode (inp, out, true);
472 if (M == 0 || M->_link) return;
473 if (M->_fftb == 0) M->alloc_fftb (_npar);
477 M = findmacnode (inp, out, false);
478 if (M == 0 || M->_link || M->_fftb == 0) return;
481 norm = 0.5f / _parsize;
482 for (k = 0; k < _npar; k++)
485 if ((i0 < n) && (i1 > 0))
488 if (fftb == 0 && create)
490 M->_fftb [k] = fftb = calloc_complex (_parsize + 1);
494 memset (_prep_data, 0, 2 * _parsize * sizeof (float));
495 j0 = (i0 < 0) ? 0 : i0;
496 j1 = (i1 > n) ? n : i1;
497 for (j = j0; j < j1; j++) _prep_data [j - i0] = norm * data [j * step];
498 fftwf_execute_dft_r2c (_plan_r2c, _prep_data, _freq_data);
499 #ifdef ENABLE_VECTOR_MODE
500 if (_options & OPT_VECTOR_MODE) fftswap (_freq_data);
502 for (j = 0; j <= (int)_parsize; j++)
504 fftb [j][0] += _freq_data [j][0];
505 fftb [j][1] += _freq_data [j][1];
514 void Convlevel::impdata_clear (uint32_t inp, uint32_t out)
519 M = findmacnode (inp, out, false);
520 if (M == 0 || M->_link || M->_fftb == 0) return;
521 for (i = 0; i < _npar; i++)
525 memset (M->_fftb [i], 0, (_parsize + 1) * sizeof (fftwf_complex));
531 void Convlevel::impdata_link (uint32_t inp1,
539 M1 = findmacnode (inp1, out1, false);
541 M2 = findmacnode (inp2, out2, true);
547 void Convlevel::reset (uint32_t inpsize,
560 for (X = _inp_list; X; X = X->_next)
562 for (i = 0; i < _npar; i++)
564 memset (X->_ffta [i], 0, (_parsize + 1) * sizeof (fftwf_complex));
567 for (Y = _out_list; Y; Y = Y->_next)
569 for (i = 0; i < 3; i++)
571 memset (Y->_buff [i], 0, _parsize * sizeof (float));
574 if (_parsize == _outsize)
581 _outoffs = _parsize / 2;
582 _inpoffs = _inpsize - _outoffs;
584 _bits = _parsize / _outsize;
593 void Convlevel::start (int abspri, int policy)
597 struct sched_param parm;
600 min = sched_get_priority_min (policy);
601 max = sched_get_priority_max (policy);
603 if (abspri > max) abspri = max;
604 if (abspri < min) abspri = min;
605 parm.sched_priority = abspri;
606 pthread_attr_init (&attr);
607 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
608 pthread_attr_setschedpolicy (&attr, policy);
609 pthread_attr_setschedparam (&attr, &parm);
610 pthread_attr_setscope (&attr, PTHREAD_SCOPE_SYSTEM);
611 pthread_attr_setinheritsched (&attr, PTHREAD_EXPLICIT_SCHED);
612 pthread_attr_setstacksize (&attr, 0x10000);
613 pthread_create (&_pthr, &attr, static_main, this);
614 pthread_attr_destroy (&attr);
618 void Convlevel::stop (void)
620 if (_stat != ST_IDLE)
628 void Convlevel::cleanup (void)
659 fftwf_destroy_plan (_plan_r2c);
660 fftwf_destroy_plan (_plan_c2r);
661 fftwf_free (_time_data);
662 fftwf_free (_prep_data);
663 fftwf_free (_freq_data);
672 void *Convlevel::static_main (void *arg)
674 ((Convlevel *) arg)->main ();
679 void Convlevel::main (void)
685 if (_stat == ST_TERM)
697 void Convlevel::process (bool skip)
699 uint32_t i, i1, j, k, n1, n2, opi1, opi2;
712 if (_inpoffs >= _inpsize)
714 _inpoffs -= _inpsize;
719 opi1 = (_opind + 1) % 3;
720 opi2 = (_opind + 2) % 3;
722 for (X = _inp_list; X; X = X->_next)
724 inpd = _inpbuff [X->_inp];
725 if (n1) memcpy (_time_data, inpd + i1, n1 * sizeof (float));
726 if (n2) memcpy (_time_data + n1, inpd, n2 * sizeof (float));
727 memset (_time_data + _parsize, 0, _parsize * sizeof (float));
728 fftwf_execute_dft_r2c (_plan_r2c, _time_data, X->_ffta [_ptind]);
729 #ifdef ENABLE_VECTOR_MODE
730 if (_options & OPT_VECTOR_MODE) fftswap (X->_ffta [_ptind]);
736 for (Y = _out_list; Y; Y = Y->_next)
738 outd = Y->_buff [opi2];
739 memset (outd, 0, _parsize * sizeof (float));
744 for (Y = _out_list; Y; Y = Y->_next)
746 memset (_freq_data, 0, (_parsize + 1) * sizeof (fftwf_complex));
747 for (M = Y->_list; M; M = M->_next)
751 for (j = 0; j < _npar; j++)
754 fftb = M->_link ? M->_link->_fftb [j] : M->_fftb [j];
757 #ifdef ENABLE_VECTOR_MODE
758 if (_options & OPT_VECTOR_MODE)
760 FV4 *A = (FV4 *) ffta;
761 FV4 *B = (FV4 *) fftb;
762 FV4 *D = (FV4 *) _freq_data;
763 for (k = 0; k < _parsize; k += 4)
765 D [0] += A [0] * B [0] - A [1] * B [1];
766 D [1] += A [0] * B [1] + A [1] * B [0];
771 _freq_data [_parsize][0] += ffta [_parsize][0] * fftb [_parsize][0];
772 _freq_data [_parsize][1] = 0;
777 for (k = 0; k <= _parsize; k++)
779 _freq_data [k][0] += ffta [k][0] * fftb [k][0] - ffta [k][1] * fftb [k][1];
780 _freq_data [k][1] += ffta [k][0] * fftb [k][1] + ffta [k][1] * fftb [k][0];
784 if (i == 0) i = _npar;
789 #ifdef ENABLE_VECTOR_MODE
790 if (_options & OPT_VECTOR_MODE) fftswap (_freq_data);
792 fftwf_execute_dft_c2r (_plan_c2r, _freq_data, _time_data);
793 outd = Y->_buff [opi1];
794 for (k = 0; k < _parsize; k++) outd [k] += _time_data [k];
795 outd = Y->_buff [opi2];
796 memcpy (outd, _time_data + _parsize, _parsize * sizeof (float));
801 if (_ptind == _npar) _ptind = 0;
805 int Convlevel::readout (bool sync, uint32_t skipcnt)
811 _outoffs += _outsize;
812 if (_outoffs == _parsize)
815 if (_stat == ST_PROC)
819 if (sync) _done.wait ();
820 else if (_done.trywait ()) break;
823 if (++_opind == 3) _opind = 0;
829 process (skipcnt >= 2 * _parsize);
830 if (++_opind == 3) _opind = 0;
834 for (Y = _out_list; Y; Y = Y->_next)
836 p = Y->_buff [_opind] + _outoffs;
837 q = _outbuff [Y->_out];
838 for (i = 0; i < _outsize; i++) q [i] += p [i];
841 return (_wait > 1) ? _bits : 0;
845 void Convlevel::print (FILE *F)
847 fprintf (F, "prio = %4d, offs = %6d, parsize = %5d, npar = %3d\n", _prio, _offs, _parsize, _npar);
851 Macnode *Convlevel::findmacnode (uint32_t inp, uint32_t out, bool create)
857 for (X = _inp_list; X && (X->_inp != inp); X = X->_next);
860 if (! create) return 0;
861 X = new Inpnode (inp);
862 X->_next = _inp_list;
864 X->alloc_ffta (_npar, _parsize);
867 for (Y = _out_list; Y && (Y->_out != out); Y = Y->_next);
870 if (! create) return 0;
871 Y = new Outnode (out, _parsize);
872 Y->_next = _out_list;
876 for (M = Y->_list; M && (M->_inpn != X); M = M->_next);
879 if (! create) return 0;
889 #ifdef ENABLE_VECTOR_MODE
891 void Convlevel::fftswap (fftwf_complex *p)
893 uint32_t n = _parsize;
912 Inpnode::Inpnode (uint16_t inp):
921 Inpnode::~Inpnode (void)
927 void Inpnode::alloc_ffta (uint16_t npar, int32_t size)
930 _ffta = new fftwf_complex * [_npar];
931 for (int i = 0; i < _npar; i++)
933 _ffta [i] = calloc_complex (size + 1);
938 void Inpnode::free_ffta (void)
941 for (uint16_t i = 0; i < _npar; i++)
943 fftwf_free ( _ffta [i]);
951 Macnode::Macnode (Inpnode *inpn):
960 Macnode::~Macnode (void)
966 void Macnode::alloc_fftb (uint16_t npar)
969 _fftb = new fftwf_complex * [_npar];
970 for (uint16_t i = 0; i < _npar; i++)
977 void Macnode::free_fftb (void)
980 for (uint16_t i = 0; i < _npar; i++)
982 fftwf_free ( _fftb [i]);
990 Outnode::Outnode (uint16_t out, int32_t size):
995 _buff [0] = calloc_real (size);
996 _buff [1] = calloc_real (size);
997 _buff [2] = calloc_real (size);
1001 Outnode::~Outnode (void)
1003 fftwf_free (_buff [0]);
1004 fftwf_free (_buff [1]);
1005 fftwf_free (_buff [2]);