1 // ----------------------------------------------------------------------------
3 // Copyright (C) 2006-2018 Fons Adriaensen <fons@linuxaudio.org>
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 3 of the License, or
8 // (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program. If not, see <http://www.gnu.org/licenses/>.
18 // ----------------------------------------------------------------------------
26 #include "zita-convolver/zita-convolver.h"
29 int zita_convolver_major_version (void)
31 return ZITA_CONVOLVER_MAJOR_VERSION;
35 int zita_convolver_minor_version (void)
37 return ZITA_CONVOLVER_MINOR_VERSION;
41 float Convproc::_mac_cost = 1.0f;
42 float Convproc::_fft_cost = 5.0f;
45 static float *calloc_real (uint32_t k)
47 float *p = fftwf_alloc_real (k);
48 if (!p) throw (Converror (Converror::MEM_ALLOC));
49 memset (p, 0, k * sizeof (float));
53 static fftwf_complex *calloc_complex (uint32_t k)
55 fftwf_complex *p = fftwf_alloc_complex (k);
56 if (!p) throw (Converror (Converror::MEM_ALLOC));
57 memset (p, 0, k * sizeof (fftwf_complex));
62 Convproc::Convproc (void) :
74 memset (_inpbuff, 0, MAXINP * sizeof (float *));
75 memset (_outbuff, 0, MAXOUT * sizeof (float *));
76 memset (_convlev, 0, MAXLEV * sizeof (Convlevel *));
80 Convproc::~Convproc (void)
87 void Convproc::set_options (uint32_t options)
93 void Convproc::set_skipcnt (uint32_t skipcnt)
95 if ((_quantum == _minpart) && (_quantum == _maxpart)) _skipcnt = skipcnt;
99 int Convproc::configure (uint32_t ninp,
107 uint32_t offs, npar, size, pind, nmin, i;
108 int prio, step, d, r, s;
111 if (_state != ST_IDLE) return Converror::BAD_STATE;
112 if ( (ninp < 1) || (ninp > MAXINP)
113 || (nout < 1) || (nout > MAXOUT)
114 || (quantum & (quantum - 1))
115 || (quantum < MINQUANT)
116 || (quantum > MAXQUANT)
117 || (minpart & (minpart - 1))
118 || (minpart < MINPART)
119 || (minpart < quantum)
120 || (minpart > MAXDIVIS * quantum)
121 || (maxpart & (maxpart - 1))
122 || (maxpart > MAXPART)
123 || (maxpart < minpart)) return Converror::BAD_PARAM;
125 nmin = (ninp < nout) ? ninp : nout;
126 if (density <= 0.0f) density = 1.0f / nmin;
127 if (density > 1.0f) density = 1.0f;
128 cfft = _fft_cost * (ninp + nout);
129 cmac = _mac_cost * ninp * nout * density;
130 step = (cfft < 4 * cmac) ? 1 : 2;
133 r = maxpart / minpart;
134 s = (r & 0xAAAA) ? 1 : 2;
137 nmin = (s == 1) ? 2 : 6;
138 if (minpart == quantum) nmin++;
141 while (size < minpart)
149 for (offs = pind = 0; offs < maxsize; pind++)
151 npar = (maxsize - offs + size - 1) / size;
152 if ((size < maxpart) && (npar > nmin))
156 d = d - (d + r - 1) / r;
157 if (cfft < d * cmac) npar = nmin;
159 _convlev [pind] = new Convlevel ();
160 _convlev [pind]->configure (prio, offs, npar, size, _options);
167 nmin = (s == 1) ? 2 : 6;
180 for (i = 0; i < ninp; i++) _inpbuff [i] = new float [_inpsize];
181 for (i = 0; i < nout; i++) _outbuff [i] = new float [_minpart];
186 return Converror::MEM_ALLOC;
194 int Convproc::impdata_create (uint32_t inp,
203 if (_state != ST_STOP) return Converror::BAD_STATE;
204 if ((inp >= _ninp) || (out >= _nout)) return Converror::BAD_PARAM;
207 for (j = 0; j < _nlevels; j++)
209 _convlev [j]->impdata_write (inp, out, step, data, ind0, ind1, true);
215 return Converror::MEM_ALLOC;
221 int Convproc::impdata_clear (uint32_t inp, uint32_t out)
225 if (_state < ST_STOP) return Converror::BAD_STATE;
226 for (k = 0; k < _nlevels; k++) _convlev [k]->impdata_clear (inp, out);
231 int Convproc::impdata_update (uint32_t inp,
240 if (_state < ST_STOP) return Converror::BAD_STATE;
241 if ((inp >= _ninp) || (out >= _nout)) return Converror::BAD_PARAM;
242 for (j = 0; j < _nlevels; j++)
244 _convlev [j]->impdata_write (inp, out, step, data, ind0, ind1, false);
250 int Convproc::impdata_link (uint32_t inp1,
257 if ((inp1 >= _ninp) || (out1 >= _nout)) return Converror::BAD_PARAM;
258 if ((inp2 >= _ninp) || (out2 >= _nout)) return Converror::BAD_PARAM;
259 if ((inp1 == inp2) && (out1 == out2)) return Converror::BAD_PARAM;
260 if (_state != ST_STOP) return Converror::BAD_STATE;
263 for (j = 0; j < _nlevels; j++)
265 _convlev [j]->impdata_link (inp1, out1, inp2, out2);
271 return Converror::MEM_ALLOC;
277 int Convproc::reset (void)
281 if (_state == ST_IDLE) return Converror::BAD_STATE;
282 for (k = 0; k < _ninp; k++) memset (_inpbuff [k], 0, _inpsize * sizeof (float));
283 for (k = 0; k < _nout; k++) memset (_outbuff [k], 0, _minpart * sizeof (float));
284 for (k = 0; k < _nlevels; k++) _convlev [k]->reset (_inpsize, _minpart, _inpbuff, _outbuff);
289 int Convproc::start_process (int abspri, int policy)
293 if (_state != ST_STOP) return Converror::BAD_STATE;
299 for (k = (_minpart == _quantum) ? 1 : 0; k < _nlevels; k++)
301 _convlev [k]->start (abspri, policy);
308 int Convproc::process (bool sync)
313 if (_state != ST_PROC) return 0;
314 _inpoffs += _quantum;
315 if (_inpoffs == _inpsize) _inpoffs = 0;
316 _outoffs += _quantum;
317 if (_outoffs == _minpart)
320 for (k = 0; k < _nout; k++) memset (_outbuff [k], 0, _minpart * sizeof (float));
321 for (k = 0; k < _nlevels; k++) f |= _convlev [k]->readout (sync, _skipcnt);
322 if (_skipcnt < _minpart) _skipcnt = 0;
323 else _skipcnt -= _minpart;
328 if (~_options & OPT_LATE_CONTIN) stop_process ();
338 int Convproc::stop_process (void)
342 if (_state != ST_PROC) return Converror::BAD_STATE;
343 for (k = 0; k < _nlevels; k++) _convlev [k]->stop ();
349 int Convproc::cleanup (void)
353 while (! check_stop ())
357 for (k = 0; k < _ninp; k++)
359 delete[] _inpbuff [k];
362 for (k = 0; k < _nout; k++)
364 delete[] _outbuff [k];
367 for (k = 0; k < _nlevels; k++)
387 bool Convproc::check_stop (void)
391 for (k = 0; (k < _nlevels) && (_convlev [k]->_stat == Convlevel::ST_IDLE); k++);
401 void Convproc::print (FILE *F)
405 for (k = 0; k < _nlevels; k++) _convlev [k]->print (F);
410 typedef float FV4 __attribute__ ((vector_size(16)));
413 Convlevel::Convlevel (void) :
431 Convlevel::~Convlevel (void)
437 void Convlevel::configure (int prio,
443 int fftwopt = (options & OPT_FFTW_MEASURE) ? FFTW_MEASURE : FFTW_ESTIMATE;
451 _time_data = calloc_real (2 * _parsize);
452 _prep_data = calloc_real (2 * _parsize);
453 _freq_data = calloc_complex (_parsize + 1);
454 _plan_r2c = fftwf_plan_dft_r2c_1d (2 * _parsize, _time_data, _freq_data, fftwopt);
455 _plan_c2r = fftwf_plan_dft_c2r_1d (2 * _parsize, _freq_data, _time_data, fftwopt);
456 if (_plan_r2c && _plan_c2r) return;
457 throw (Converror (Converror::MEM_ALLOC));
461 void Convlevel::impdata_write (uint32_t inp,
470 int32_t j, j0, j1, n;
477 i1 = i0 + _npar * _parsize;
478 if ((i0 >= n) || (i1 <= 0)) return;
482 M = findmacnode (inp, out, true);
483 if (M == 0 || M->_link) return;
484 if (M->_fftb == 0) M->alloc_fftb (_npar);
488 M = findmacnode (inp, out, false);
489 if (M == 0 || M->_link || M->_fftb == 0) return;
492 norm = 0.5f / _parsize;
493 for (k = 0; k < _npar; k++)
496 if ((i0 < n) && (i1 > 0))
499 if (fftb == 0 && create)
501 M->_fftb [k] = fftb = calloc_complex (_parsize + 1);
505 memset (_prep_data, 0, 2 * _parsize * sizeof (float));
506 j0 = (i0 < 0) ? 0 : i0;
507 j1 = (i1 > n) ? n : i1;
508 for (j = j0; j < j1; j++) _prep_data [j - i0] = norm * data [j * step];
509 fftwf_execute_dft_r2c (_plan_r2c, _prep_data, _freq_data);
510 #ifdef ENABLE_VECTOR_MODE
511 if (_options & OPT_VECTOR_MODE) fftswap (_freq_data);
513 for (j = 0; j <= (int)_parsize; j++)
515 fftb [j][0] += _freq_data [j][0];
516 fftb [j][1] += _freq_data [j][1];
525 void Convlevel::impdata_clear (uint32_t inp, uint32_t out)
530 M = findmacnode (inp, out, false);
531 if (M == 0 || M->_link || M->_fftb == 0) return;
532 for (i = 0; i < _npar; i++)
536 memset (M->_fftb [i], 0, (_parsize + 1) * sizeof (fftwf_complex));
542 void Convlevel::impdata_link (uint32_t inp1,
550 M1 = findmacnode (inp1, out1, false);
552 M2 = findmacnode (inp2, out2, true);
558 void Convlevel::reset (uint32_t inpsize,
571 for (X = _inp_list; X; X = X->_next)
573 for (i = 0; i < _npar; i++)
575 memset (X->_ffta [i], 0, (_parsize + 1) * sizeof (fftwf_complex));
578 for (Y = _out_list; Y; Y = Y->_next)
580 for (i = 0; i < 3; i++)
582 memset (Y->_buff [i], 0, _parsize * sizeof (float));
585 if (_parsize == _outsize)
592 _outoffs = _parsize / 2;
593 _inpoffs = _inpsize - _outoffs;
595 _bits = _parsize / _outsize;
604 void Convlevel::start (int abspri, int policy)
608 struct sched_param parm;
611 min = sched_get_priority_min (policy);
612 max = sched_get_priority_max (policy);
614 if (abspri > max) abspri = max;
615 if (abspri < min) abspri = min;
616 parm.sched_priority = abspri;
617 pthread_attr_init (&attr);
618 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
619 pthread_attr_setschedpolicy (&attr, policy);
620 pthread_attr_setschedparam (&attr, &parm);
621 pthread_attr_setscope (&attr, PTHREAD_SCOPE_SYSTEM);
622 pthread_attr_setinheritsched (&attr, PTHREAD_EXPLICIT_SCHED);
623 pthread_attr_setstacksize (&attr, 0x10000);
624 pthread_create (&_pthr, &attr, static_main, this);
625 pthread_attr_destroy (&attr);
629 void Convlevel::stop (void)
631 if (_stat != ST_IDLE)
639 void Convlevel::cleanup (void)
670 fftwf_destroy_plan (_plan_r2c);
671 fftwf_destroy_plan (_plan_c2r);
672 fftwf_free (_time_data);
673 fftwf_free (_prep_data);
674 fftwf_free (_freq_data);
683 void *Convlevel::static_main (void *arg)
685 ((Convlevel *) arg)->main ();
690 void Convlevel::main (void)
696 if (_stat == ST_TERM)
708 void Convlevel::process (bool skip)
710 uint32_t i, i1, j, k, n1, n2, opi1, opi2;
723 if (_inpoffs >= _inpsize)
725 _inpoffs -= _inpsize;
730 opi1 = (_opind + 1) % 3;
731 opi2 = (_opind + 2) % 3;
733 for (X = _inp_list; X; X = X->_next)
735 inpd = _inpbuff [X->_inp];
736 if (n1) memcpy (_time_data, inpd + i1, n1 * sizeof (float));
737 if (n2) memcpy (_time_data + n1, inpd, n2 * sizeof (float));
738 memset (_time_data + _parsize, 0, _parsize * sizeof (float));
739 fftwf_execute_dft_r2c (_plan_r2c, _time_data, X->_ffta [_ptind]);
740 #ifdef ENABLE_VECTOR_MODE
741 if (_options & OPT_VECTOR_MODE) fftswap (X->_ffta [_ptind]);
747 for (Y = _out_list; Y; Y = Y->_next)
749 outd = Y->_buff [opi2];
750 memset (outd, 0, _parsize * sizeof (float));
755 for (Y = _out_list; Y; Y = Y->_next)
757 memset (_freq_data, 0, (_parsize + 1) * sizeof (fftwf_complex));
758 for (M = Y->_list; M; M = M->_next)
762 for (j = 0; j < _npar; j++)
765 fftb = M->_link ? M->_link->_fftb [j] : M->_fftb [j];
768 #ifdef ENABLE_VECTOR_MODE
769 if (_options & OPT_VECTOR_MODE)
771 FV4 *A = (FV4 *) ffta;
772 FV4 *B = (FV4 *) fftb;
773 FV4 *D = (FV4 *) _freq_data;
774 for (k = 0; k < _parsize; k += 4)
776 D [0] += A [0] * B [0] - A [1] * B [1];
777 D [1] += A [0] * B [1] + A [1] * B [0];
782 _freq_data [_parsize][0] += ffta [_parsize][0] * fftb [_parsize][0];
783 _freq_data [_parsize][1] = 0;
788 for (k = 0; k <= _parsize; k++)
790 _freq_data [k][0] += ffta [k][0] * fftb [k][0] - ffta [k][1] * fftb [k][1];
791 _freq_data [k][1] += ffta [k][0] * fftb [k][1] + ffta [k][1] * fftb [k][0];
795 if (i == 0) i = _npar;
800 #ifdef ENABLE_VECTOR_MODE
801 if (_options & OPT_VECTOR_MODE) fftswap (_freq_data);
803 fftwf_execute_dft_c2r (_plan_c2r, _freq_data, _time_data);
804 outd = Y->_buff [opi1];
805 for (k = 0; k < _parsize; k++) outd [k] += _time_data [k];
806 outd = Y->_buff [opi2];
807 memcpy (outd, _time_data + _parsize, _parsize * sizeof (float));
812 if (_ptind == _npar) _ptind = 0;
816 int Convlevel::readout (bool sync, uint32_t skipcnt)
822 _outoffs += _outsize;
823 if (_outoffs == _parsize)
826 if (_stat == ST_PROC)
830 if (sync) _done.wait ();
831 else if (_done.trywait ()) break;
834 if (++_opind == 3) _opind = 0;
840 process (skipcnt >= 2 * _parsize);
841 if (++_opind == 3) _opind = 0;
845 for (Y = _out_list; Y; Y = Y->_next)
847 p = Y->_buff [_opind] + _outoffs;
848 q = _outbuff [Y->_out];
849 for (i = 0; i < _outsize; i++) q [i] += p [i];
852 return (_wait > 1) ? _bits : 0;
856 void Convlevel::print (FILE *F)
858 fprintf (F, "prio = %4d, offs = %6d, parsize = %5d, npar = %3d\n", _prio, _offs, _parsize, _npar);
862 Macnode *Convlevel::findmacnode (uint32_t inp, uint32_t out, bool create)
868 for (X = _inp_list; X && (X->_inp != inp); X = X->_next);
871 if (! create) return 0;
872 X = new Inpnode (inp);
873 X->_next = _inp_list;
875 X->alloc_ffta (_npar, _parsize);
878 for (Y = _out_list; Y && (Y->_out != out); Y = Y->_next);
881 if (! create) return 0;
882 Y = new Outnode (out, _parsize);
883 Y->_next = _out_list;
887 for (M = Y->_list; M && (M->_inpn != X); M = M->_next);
890 if (! create) return 0;
900 #ifdef ENABLE_VECTOR_MODE
902 void Convlevel::fftswap (fftwf_complex *p)
904 uint32_t n = _parsize;
923 Inpnode::Inpnode (uint16_t inp):
932 Inpnode::~Inpnode (void)
938 void Inpnode::alloc_ffta (uint16_t npar, int32_t size)
941 _ffta = new fftwf_complex * [_npar];
942 for (int i = 0; i < _npar; i++)
944 _ffta [i] = calloc_complex (size + 1);
949 void Inpnode::free_ffta (void)
952 for (uint16_t i = 0; i < _npar; i++)
954 fftwf_free ( _ffta [i]);
962 Macnode::Macnode (Inpnode *inpn):
971 Macnode::~Macnode (void)
977 void Macnode::alloc_fftb (uint16_t npar)
980 _fftb = new fftwf_complex * [_npar];
981 for (uint16_t i = 0; i < _npar; i++)
988 void Macnode::free_fftb (void)
991 for (uint16_t i = 0; i < _npar; i++)
993 fftwf_free ( _fftb [i]);
1001 Outnode::Outnode (uint16_t out, int32_t size):
1006 _buff [0] = calloc_real (size);
1007 _buff [1] = calloc_real (size);
1008 _buff [2] = calloc_real (size);
1012 Outnode::~Outnode (void)
1014 fftwf_free (_buff [0]);
1015 fftwf_free (_buff [1]);
1016 fftwf_free (_buff [2]);