Merge branch 'intel_ipp'

author Paul Brossier <piem@piem.org>

Sun, 1 Oct 2017 18:04:30 +0000 (20:04 +0200)

committer Paul Brossier <piem@piem.org>

Sun, 1 Oct 2017 18:04:30 +0000 (20:04 +0200)
author Paul Brossier <piem@piem.org>
Sun, 1 Oct 2017 18:04:30 +0000 (20:04 +0200)
committer Paul Brossier <piem@piem.org>
Sun, 1 Oct 2017 18:04:30 +0000 (20:04 +0200)
diff --git a/src/aubio_priv.h b/src/aubio_priv.h

index bc0ce9c..65a3d8a 100644 (file)
--- a/src/aubio_priv.h
+++ b/src/aubio_priv.h
@@ -123,7 +123,38 @@
  #endif /* HAVE_AUBIO_DOUBLE */
  #endif /* HAVE_ATLAS */
  
-#if !defined(HAVE_MEMCPY_HACKS) && !defined(HAVE_ACCELERATE) && !defined(HAVE_ATLAS)
+#if defined HAVE_INTEL_IPP
+#include <ippcore.h>
+#include <ippvm.h>
+#include <ipps.h>
+#ifndef HAVE_AUBIO_DOUBLE
+#define aubio_ippsSet         ippsSet_32f
+#define aubio_ippsZero        ippsZero_32f
+#define aubio_ippsCopy        ippsCopy_32f
+#define aubio_ippsMul         ippsMul_32f
+#define aubio_ippsMulC        ippsMulC_32f
+#define aubio_ippsAddC        ippsAddC_32f
+#define aubio_ippsLn          ippsLn_32f_A21
+#define aubio_ippsMean(a,b,c) ippsMean_32f(a, b, c, ippAlgHintFast)
+#define aubio_ippsSum(a,b,c)  ippsSum_32f(a, b, c, ippAlgHintFast)
+#define aubio_ippsMax         ippsMax_32f
+#define aubio_ippsMin         ippsMin_32f
+#else /* HAVE_AUBIO_DOUBLE */
+#define aubio_ippsSet         ippsSet_64f
+#define aubio_ippsZero        ippsZero_64f
+#define aubio_ippsCopy        ippsCopy_64f
+#define aubio_ippsMul         ippsMul_64f
+#define aubio_ippsMulC        ippsMulC_64f
+#define aubio_ippsAddC        ippsAddC_64f
+#define aubio_ippsLn          ippsLn_64f_A26
+#define aubio_ippsMean        ippsMean_64f
+#define aubio_ippsSum         ippsSum_64f
+#define aubio_ippsMax         ippsMax_64f
+#define aubio_ippsMin         ippsMin_64f
+#endif /* HAVE_AUBIO_DOUBLE */
+#endif
+
+#if !defined(HAVE_MEMCPY_HACKS) && !defined(HAVE_ACCELERATE) && !defined(HAVE_ATLAS) && !defined(HAVE_INTEL_IPP)
  #define HAVE_NOOPT 1
  #else
  #undef HAVE_NOOPT
diff --git a/src/cvec.c b/src/cvec.c

index 4ed2a43..76755da 100644 (file)
--- a/src/cvec.c
+++ b/src/cvec.c
@@ -85,31 +85,40 @@ void cvec_copy(const cvec_t *s, cvec_t *t) {
          s->length, t->length);
      return;
    }
-#ifdef HAVE_MEMCPY_HACKS
+#if defined(HAVE_INTEL_IPP)
+  aubio_ippsCopy(s->phas, t->phas, (int)s->length);
+  aubio_ippsCopy(s->norm, t->norm, (int)s->length);
+#elif defined(HAVE_MEMCPY_HACKS)
    memcpy(t->norm, s->norm, t->length * sizeof(smpl_t));
    memcpy(t->phas, s->phas, t->length * sizeof(smpl_t));
-#else /* HAVE_MEMCPY_HACKS */
+#else
    uint_t j;
    for (j=0; j< t->length; j++) {
      t->norm[j] = s->norm[j];
      t->phas[j] = s->phas[j];
    }
-#endif /* HAVE_MEMCPY_HACKS */
+#endif
  }
  
-void cvec_norm_set_all (cvec_t *s, smpl_t val) {
+void cvec_norm_set_all(cvec_t *s, smpl_t val) {
+#if defined(HAVE_INTEL_IPP)
+  aubio_ippsSet(val, s->norm, (int)s->length);
+#else
    uint_t j;
    for (j=0; j< s->length; j++) {
      s->norm[j] = val;
    }
+#endif
  }
  
  void cvec_norm_zeros(cvec_t *s) {
-#ifdef HAVE_MEMCPY_HACKS
+#if defined(HAVE_INTEL_IPP)
+  aubio_ippsZero(s->norm, (int)s->length);
+#elif defined(HAVE_MEMCPY_HACKS)
    memset(s->norm, 0, s->length * sizeof(smpl_t));
-#else /* HAVE_MEMCPY_HACKS */
+#else
    cvec_norm_set_all (s, 0.);
-#endif /* HAVE_MEMCPY_HACKS */
+#endif
  }
  
  void cvec_norm_ones(cvec_t *s) {
@@ -117,14 +126,20 @@ void cvec_norm_ones(cvec_t *s) {
  }
  
  void cvec_phas_set_all (cvec_t *s, smpl_t val) {
+#if defined(HAVE_INTEL_IPP)
+  aubio_ippsSet(val, s->phas, (int)s->length);
+#else
    uint_t j;
    for (j=0; j< s->length; j++) {
      s->phas[j] = val;
    }
+#endif
  }
  
  void cvec_phas_zeros(cvec_t *s) {
-#ifdef HAVE_MEMCPY_HACKS
+#if defined(HAVE_INTEL_IPP)
+  aubio_ippsZero(s->phas, (int)s->length);
+#elif defined(HAVE_MEMCPY_HACKS)
    memset(s->phas, 0, s->length * sizeof(smpl_t));
  #else
    cvec_phas_set_all (s, 0.);
@@ -141,8 +156,14 @@ void cvec_zeros(cvec_t *s) {
  }
  
  void cvec_logmag(cvec_t *s, smpl_t lambda) {
-  uint_t j;
-  for (j=0; j< s->length; j++) {
-    s->norm[j] = LOG(lambda * s->norm[j] + 1);
-  }
+  #if defined(HAVE_INTEL_IPP)
+    aubio_ippsMulC(s->norm, lambda, s->norm, (int)s->length);
+    aubio_ippsAddC(s->norm, 1.0, s->norm, (int)s->length);
+    aubio_ippsLn(s->norm, s->norm, (int)s->length);
+  #else
+    uint_t j;
+    for (j=0; j< s->length; j++) {
+      s->norm[j] = LOG(lambda * s->norm[j] + 1);
+    }
+  #endif
  }
diff --git a/src/fvec.c b/src/fvec.c

index 3961d74..d356f81 100644 (file)
--- a/src/fvec.c
+++ b/src/fvec.c
@@ -60,27 +60,30 @@ void fvec_print(const fvec_t *s) {
  }
  
  void fvec_set_all (fvec_t *s, smpl_t val) {
-#if !defined(HAVE_ACCELERATE) && !defined(HAVE_ATLAS)
-  uint_t j;
-  for (j=0; j< s->length; j++) {
-    s->data[j] = val;
-  }
+#if defined(HAVE_INTEL_IPP)
+  aubio_ippsSet(val, s->data, (int)s->length);
  #elif defined(HAVE_ATLAS)
    aubio_catlas_set(s->length, val, s->data, 1);
  #elif defined(HAVE_ACCELERATE)
    aubio_vDSP_vfill(&val, s->data, 1, s->length);
+#else
+  uint_t j;
+  for ( j = 0; j< s->length; j++ )
+  {
+    s->data[j] = val;
+  }
  #endif
  }
  
  void fvec_zeros(fvec_t *s) {
-#if !defined(HAVE_MEMCPY_HACKS) && !defined(HAVE_ACCELERATE)
-  fvec_set_all (s, 0.);
-#else
-#if defined(HAVE_MEMCPY_HACKS)
+#if defined(HAVE_INTEL_IPP)
+  aubio_ippsZero(s->data, (int)s->length);
+#elif defined(HAVE_ACCELERATE)
+  aubio_vDSP_vclr(s->data, 1, s->length);
+#elif defined(HAVE_MEMCPY_HACKS)
    memset(s->data, 0, s->length * sizeof(smpl_t));
  #else
-  aubio_vDSP_vclr(s->data, 1, s->length);
-#endif
+  fvec_set_all(s, 0.);
  #endif
  }
  
@@ -96,27 +99,31 @@ void fvec_rev(fvec_t *s) {
  }
  
  void fvec_weight(fvec_t *s, const fvec_t *weight) {
-#ifndef HAVE_ACCELERATE
-  uint_t j;
    uint_t length = MIN(s->length, weight->length);
-  for (j=0; j< length; j++) {
+#if defined(HAVE_INTEL_IPP)
+  aubio_ippsMul(s->data, weight->data, s->data, (int)length);
+#elif defined(HAVE_ACCELERATE)
+  aubio_vDSP_vmul( s->data, 1, weight->data, 1, s->data, 1, length );
+#else
+  uint_t j;
+  for (j = 0; j < length; j++) {
      s->data[j] *= weight->data[j];
    }
-#else
-  aubio_vDSP_vmul(s->data, 1, weight->data, 1, s->data, 1, s->length);
  #endif /* HAVE_ACCELERATE */
  }
  
  void fvec_weighted_copy(const fvec_t *in, const fvec_t *weight, fvec_t *out) {
-#ifndef HAVE_ACCELERATE
+  uint_t length = MIN(in->length, MIN(out->length, weight->length));
+#if defined(HAVE_INTEL_IPP)
+  aubio_ippsMul(in->data, weight->data, out->data, (int)length);
+#elif defined(HAVE_ACCELERATE)
+  aubio_vDSP_vmul(in->data, 1, weight->data, 1, out->data, 1, length);
+#else
    uint_t j;
-  uint_t length = MIN(out->length, weight->length);
-  for (j=0; j< length; j++) {
+  for (j = 0; j < length; j++) {
      out->data[j] = in->data[j] * weight->data[j];
    }
-#else
-  aubio_vDSP_vmul(in->data, 1, weight->data, 1, out->data, 1, out->length);
-#endif /* HAVE_ACCELERATE */
+#endif
  }
  
  void fvec_copy(const fvec_t *s, fvec_t *t) {
@@ -125,16 +132,18 @@ void fvec_copy(const fvec_t *s, fvec_t *t) {
          s->length, t->length);
      return;
    }
-#ifdef HAVE_NOOPT
-  uint_t j;
-  for (j=0; j< t->length; j++) {
-    t->data[j] = s->data[j];
-  }
-#elif defined(HAVE_MEMCPY_HACKS)
-  memcpy(t->data, s->data, t->length * sizeof(smpl_t));
+#if defined(HAVE_INTEL_IPP)
+  aubio_ippsCopy(s->data, t->data, (int)s->length);
  #elif defined(HAVE_ATLAS)
    aubio_cblas_copy(s->length, s->data, 1, t->data, 1);
  #elif defined(HAVE_ACCELERATE)
    aubio_vDSP_mmov(s->data, t->data, 1, s->length, 1, 1);
+#elif defined(HAVE_MEMCPY_HACKS)
+  memcpy(t->data, s->data, t->length * sizeof(smpl_t));
+#else
+  uint_t j;
+  for (j = 0; j < t->length; j++) {
+    t->data[j] = s->data[j];
+  }
  #endif
  }
diff --git a/src/mathutils.c b/src/mathutils.c

index 6a55fce..ef7b0de 100644 (file)
--- a/src/mathutils.c
+++ b/src/mathutils.c
@@ -159,45 +159,53 @@ smpl_t
  fvec_mean (fvec_t * s)
  {
    smpl_t tmp = 0.0;
-#ifndef HAVE_ACCELERATE
+#if defined(HAVE_INTEL_IPP)
+  aubio_ippsMean(s->data, (int)s->length, &tmp);
+  return tmp;
+#elif defined(HAVE_ACCELERATE)
+  aubio_vDSP_meanv(s->data, 1, &tmp, s->length);
+  return tmp;
+#else
    uint_t j;
    for (j = 0; j < s->length; j++) {
      tmp += s->data[j];
    }
-  return tmp / (smpl_t) (s->length);
-#else
-  aubio_vDSP_meanv(s->data, 1, &tmp, s->length);
-  return tmp;
-#endif /* HAVE_ACCELERATE */
+  return tmp / (smpl_t)(s->length);
+#endif
  }
  
  smpl_t
  fvec_sum (fvec_t * s)
  {
    smpl_t tmp = 0.0;
-#ifndef HAVE_ACCELERATE
+#if defined(HAVE_INTEL_IPP)
+  aubio_ippsSum(s->data, (int)s->length, &tmp);
+#elif defined(HAVE_ACCELERATE)
+  aubio_vDSP_sve(s->data, 1, &tmp, s->length);
+#else
    uint_t j;
    for (j = 0; j < s->length; j++) {
      tmp += s->data[j];
    }
-#else
-  aubio_vDSP_sve(s->data, 1, &tmp, s->length);
-#endif /* HAVE_ACCELERATE */
+#endif
    return tmp;
  }
  
  smpl_t
  fvec_max (fvec_t * s)
  {
-#ifndef HAVE_ACCELERATE
+#if defined(HAVE_INTEL_IPP)
+  smpl_t tmp = 0.;
+  aubio_ippsMax( s->data, (int)s->length, &tmp);
+#elif defined(HAVE_ACCELERATE)
+  smpl_t tmp = 0.;
+  aubio_vDSP_maxv( s->data, 1, &tmp, s->length );
+#else
    uint_t j;
-  smpl_t tmp = 0.0;
-  for (j = 0; j < s->length; j++) {
+  smpl_t tmp = s->data[0];
+  for (j = 1; j < s->length; j++) {
      tmp = (tmp > s->data[j]) ? tmp : s->data[j];
    }
-#else
-  smpl_t tmp = 0.;
-  aubio_vDSP_maxv(s->data, 1, &tmp, s->length);
  #endif
    return tmp;
  }
@@ -205,15 +213,18 @@ fvec_max (fvec_t * s)
  smpl_t
  fvec_min (fvec_t * s)
  {
-#ifndef HAVE_ACCELERATE
+#if defined(HAVE_INTEL_IPP)
+  smpl_t tmp = 0.;
+  aubio_ippsMin(s->data, (int)s->length, &tmp);
+#elif defined(HAVE_ACCELERATE)
+  smpl_t tmp = 0.;
+  aubio_vDSP_minv(s->data, 1, &tmp, s->length);
+#else
    uint_t j;
    smpl_t tmp = s->data[0];
-  for (j = 0; j < s->length; j++) {
+  for (j = 1; j < s->length; j++) {
      tmp = (tmp < s->data[j]) ? tmp : s->data[j];
    }
-#else
-  smpl_t tmp = 0.;
-  aubio_vDSP_minv(s->data, 1, &tmp, s->length);
  #endif
    return tmp;
  }
@@ -574,6 +585,17 @@ aubio_next_power_of_two (uint_t a)
    return i;
  }
  
+uint_t
+aubio_power_of_two_order (uint_t a)
+{
+  int order = 0;
+  int temp = aubio_next_power_of_two(a);
+  while (temp >>= 1) {
+    ++order;
+  }
+  return order;
+}
+
  smpl_t
  aubio_db_spl (const fvec_t * o)
  {
diff --git a/src/mathutils.h b/src/mathutils.h

index 7bef5a4..56e52ea 100644 (file)
--- a/src/mathutils.h
+++ b/src/mathutils.h
@@ -312,6 +312,9 @@ uint_t aubio_is_power_of_two(uint_t a);
  /** return the next power of power of 2 greater than a */
  uint_t aubio_next_power_of_two(uint_t a);
  
+/** return the log2 factor of the given power of 2 value a */
+uint_t aubio_power_of_two_order(uint_t a);
+
  /** compute normalised autocorrelation function
  
    \param input vector to compute autocorrelation from
diff --git a/src/spectral/fft.c b/src/spectral/fft.c

index a6bd902..0ca467c 100644 (file)
--- a/src/spectral/fft.c
+++ b/src/spectral/fft.c
@@ -77,8 +77,7 @@ typedef FFTW_TYPE fft_data_t;
  // a global mutex for FFTW thread safety
  pthread_mutex_t aubio_fftw_mutex = PTHREAD_MUTEX_INITIALIZER;
  
-#else
-#ifdef HAVE_ACCELERATE        // using ACCELERATE
+#elif defined HAVE_ACCELERATE        // using ACCELERATE
  // https://developer.apple.com/library/mac/#documentation/Accelerate/Reference/vDSPRef/Reference/reference.html
  #include <Accelerate/Accelerate.h>
  
@@ -112,32 +111,65 @@ pthread_mutex_t aubio_fftw_mutex = PTHREAD_MUTEX_INITIALIZER;
  #define aubio_vvsqrt                   vvsqrt
  #endif /* HAVE_AUBIO_DOUBLE */
  
-#else                         // using OOURA
+#elif defined HAVE_INTEL_IPP // using INTEL IPP
+
+#if !HAVE_AUBIO_DOUBLE
+#define aubio_IppFloat                 Ipp32f
+#define aubio_IppComplex               Ipp32fc
+#define aubio_FFTSpec                  FFTSpec_R_32f
+#define aubio_ippsMalloc_complex       ippsMalloc_32fc
+#define aubio_ippsFFTInit_R            ippsFFTInit_R_32f
+#define aubio_ippsFFTGetSize_R         ippsFFTGetSize_R_32f
+#define aubio_ippsFFTInv_CCSToR        ippsFFTInv_CCSToR_32f
+#define aubio_ippsFFTFwd_RToCCS        ippsFFTFwd_RToCCS_32f
+#define aubio_ippsAtan2                ippsAtan2_32f_A21
+#else /* HAVE_AUBIO_DOUBLE */
+#define aubio_IppFloat                 Ipp64f
+#define aubio_IppComplex               Ipp64fc
+#define aubio_FFTSpec                  FFTSpec_R_64f
+#define aubio_ippsMalloc_complex       ippsMalloc_64fc
+#define aubio_ippsFFTInit_R            ippsFFTInit_R_64f
+#define aubio_ippsFFTGetSize_R         ippsFFTGetSize_R_64f
+#define aubio_ippsFFTInv_CCSToR        ippsFFTInv_CCSToR_64f
+#define aubio_ippsFFTFwd_RToCCS        ippsFFTFwd_RToCCS_64f
+#define aubio_ippsAtan2                ippsAtan2_64f_A50
+#endif
+
+
+#else // using OOURA
  // let's use ooura instead
  extern void aubio_ooura_rdft(int, int, smpl_t *, int *, smpl_t *);
  
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
+#endif
  
  struct _aubio_fft_t {
    uint_t winsize;
    uint_t fft_size;
+
  #ifdef HAVE_FFTW3             // using FFTW3
    real_t *in, *out;
    fftw_plan pfw, pbw;
-  fft_data_t * specdata;      /* complex spectral data */
-#else
-#ifdef HAVE_ACCELERATE        // using ACCELERATE
+  fft_data_t * specdata; /* complex spectral data */
+
+#elif defined HAVE_ACCELERATE  // using ACCELERATE
    int log2fftsize;
    aubio_FFTSetup fftSetup;
    aubio_DSPSplitComplex spec;
    smpl_t *in, *out;
+
+#elif defined HAVE_INTEL_IPP  // using Intel IPP
+  smpl_t *in, *out;
+  Ipp8u* memSpec;
+  Ipp8u* memInit;
+  Ipp8u* memBuffer;
+  struct aubio_FFTSpec* fftSpec;
+  aubio_IppComplex* complexOut;
  #else                         // using OOURA
    smpl_t *in, *out;
    smpl_t *w;
    int *ip;
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
+#endif /* using OOURA */
+
    fvec_t * compspec;
  };
  
@@ -147,6 +179,7 @@ aubio_fft_t * new_aubio_fft (uint_t winsize) {
      AUBIO_ERR("fft: got winsize %d, but can not be < 2\n", winsize);
      goto beach;
    }
+
  #ifdef HAVE_FFTW3
    uint_t i;
    s->winsize  = winsize;
@@ -175,17 +208,55 @@ aubio_fft_t * new_aubio_fft (uint_t winsize) {
    for (i = 0; i < s->fft_size; i++) {
      s->specdata[i] = 0.;
    }
-#else
-#ifdef HAVE_ACCELERATE        // using ACCELERATE
+
+#elif defined HAVE_ACCELERATE  // using ACCELERATE
    s->winsize = winsize;
    s->fft_size = winsize;
    s->compspec = new_fvec(winsize);
-  s->log2fftsize = (uint_t)log2f(s->fft_size);
+  s->log2fftsize = aubio_power_of_two_order(s->fft_size);
    s->in = AUBIO_ARRAY(smpl_t, s->fft_size);
    s->out = AUBIO_ARRAY(smpl_t, s->fft_size);
    s->spec.realp = AUBIO_ARRAY(smpl_t, s->fft_size/2);
    s->spec.imagp = AUBIO_ARRAY(smpl_t, s->fft_size/2);
    s->fftSetup = aubio_vDSP_create_fftsetup(s->log2fftsize, FFT_RADIX2);
+
+#elif defined HAVE_INTEL_IPP  // using Intel IPP
+  const IppHintAlgorithm qualityHint = ippAlgHintAccurate; // OR ippAlgHintFast;
+  const int flags = IPP_FFT_NODIV_BY_ANY; // we're scaling manually afterwards
+  int order = aubio_power_of_two_order(winsize);
+  int sizeSpec, sizeInit, sizeBuffer;
+  IppStatus status;
+
+  if (winsize <= 4 || aubio_is_power_of_two(winsize) != 1)
+  {
+    AUBIO_ERR("intel IPP fft: can only create with sizes > 4 and power of two, requested %d,"
+      " try recompiling aubio with --enable-fftw3\n", winsize);
+    goto beach;
+  }
+
+  status = aubio_ippsFFTGetSize_R(order, flags, qualityHint,
+      &sizeSpec, &sizeInit, &sizeBuffer);
+  if (status != ippStsNoErr) {
+    AUBIO_ERR("fft: failed to initialize fft. IPP error: %d\n", status);
+    goto beach;
+  }
+  s->fft_size = s->winsize = winsize;
+  s->compspec = new_fvec(winsize);
+  s->in = AUBIO_ARRAY(smpl_t, s->winsize);
+  s->out = AUBIO_ARRAY(smpl_t, s->winsize);
+  s->memSpec = ippsMalloc_8u(sizeSpec);
+  s->memBuffer = ippsMalloc_8u(sizeBuffer);
+  if (sizeInit > 0 ) {
+    s->memInit = ippsMalloc_8u(sizeInit);
+  }
+  s->complexOut = aubio_ippsMalloc_complex(s->fft_size / 2 + 1);
+  status = aubio_ippsFFTInit_R(
+    &s->fftSpec, order, flags, qualityHint, s->memSpec, s->memInit);
+  if (status != ippStsNoErr) {
+    AUBIO_ERR("fft: failed to initialize. IPP error: %d\n", status);
+    goto beach;
+  }
+
  #else                         // using OOURA
    if (aubio_is_power_of_two(winsize) != 1) {
      AUBIO_ERR("fft: can only create with sizes power of two, requested %d,"
@@ -200,9 +271,10 @@ aubio_fft_t * new_aubio_fft (uint_t winsize) {
    s->ip    = AUBIO_ARRAY(int   , s->fft_size);
    s->w     = AUBIO_ARRAY(smpl_t, s->fft_size);
    s->ip[0] = 0;
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
+#endif /* using OOURA */
+
    return s;
+
  beach:
    AUBIO_FREE(s);
    return NULL;
@@ -210,25 +282,32 @@ beach:
  
  void del_aubio_fft(aubio_fft_t * s) {
    /* destroy data */
-  del_fvec(s->compspec);
  #ifdef HAVE_FFTW3             // using FFTW3
    pthread_mutex_lock(&aubio_fftw_mutex);
    fftw_destroy_plan(s->pfw);
    fftw_destroy_plan(s->pbw);
    fftw_free(s->specdata);
    pthread_mutex_unlock(&aubio_fftw_mutex);
-#else /* HAVE_FFTW3 */
-#ifdef HAVE_ACCELERATE        // using ACCELERATE
+
+#elif defined HAVE_ACCELERATE // using ACCELERATE
    AUBIO_FREE(s->spec.realp);
    AUBIO_FREE(s->spec.imagp);
    aubio_vDSP_destroy_fftsetup(s->fftSetup);
+
+#elif defined HAVE_INTEL_IPP  // using Intel IPP
+  ippFree(s->memSpec);
+  ippFree(s->memInit);
+  ippFree(s->memBuffer);
+  ippFree(s->complexOut);
+
  #else                         // using OOURA
    AUBIO_FREE(s->w);
    AUBIO_FREE(s->ip);
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
-  AUBIO_FREE(s->out);
+#endif
+
+  del_fvec(s->compspec);
    AUBIO_FREE(s->in);
+  AUBIO_FREE(s->out);
    AUBIO_FREE(s);
  }
  
@@ -251,6 +330,7 @@ void aubio_fft_do_complex(aubio_fft_t * s, const fvec_t * input, fvec_t * compsp
  #else
    memcpy(s->in, input->data, s->winsize * sizeof(smpl_t));
  #endif /* HAVE_MEMCPY_HACKS */
+
  #ifdef HAVE_FFTW3             // using FFTW3
    fftw_execute(s->pfw);
  #ifdef HAVE_COMPLEX_H
@@ -265,8 +345,8 @@ void aubio_fft_do_complex(aubio_fft_t * s, const fvec_t * input, fvec_t * compsp
      compspec->data[i] = s->specdata[i];
    }
  #endif /* HAVE_COMPLEX_H */
-#else /* HAVE_FFTW3 */
-#ifdef HAVE_ACCELERATE        // using ACCELERATE
+
+#elif defined HAVE_ACCELERATE // using ACCELERATE
    // convert real data to even/odd format used in vDSP
    aubio_vDSP_ctoz((aubio_DSPComplex*)s->in, 2, &s->spec, 1, s->fft_size/2);
    // compute the FFT
@@ -281,6 +361,19 @@ void aubio_fft_do_complex(aubio_fft_t * s, const fvec_t * input, fvec_t * compsp
    // apply scaling
    smpl_t scale = 1./2.;
    aubio_vDSP_vsmul(compspec->data, 1, &scale, compspec->data, 1, s->fft_size);
+
+#elif defined HAVE_INTEL_IPP  // using Intel IPP
+
+  // apply fft
+  aubio_ippsFFTFwd_RToCCS(s->in, (aubio_IppFloat*)s->complexOut, s->fftSpec, s->memBuffer);
+  // convert complex buffer to [ r0, r1, ..., rN, iN-1, .., i2, i1]
+  compspec->data[0] = s->complexOut[0].re;
+  compspec->data[s->fft_size / 2] = s->complexOut[s->fft_size / 2].re;
+  for (i = 1; i < s->fft_size / 2; i++) {
+    compspec->data[i] = s->complexOut[i].re;
+    compspec->data[s->fft_size - i] = s->complexOut[i].im;
+  }
+
  #else                         // using OOURA
    aubio_ooura_rdft(s->winsize, 1, s->in, s->ip, s->w);
    compspec->data[0] = s->in[0];
@@ -289,8 +382,7 @@ void aubio_fft_do_complex(aubio_fft_t * s, const fvec_t * input, fvec_t * compsp
      compspec->data[i] = s->in[2 * i];
      compspec->data[s->winsize - i] = - s->in[2 * i + 1];
    }
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
+#endif /* using OOURA */
  }
  
  void aubio_fft_rdo_complex(aubio_fft_t * s, const fvec_t * compspec, fvec_t * output) {
@@ -313,8 +405,8 @@ void aubio_fft_rdo_complex(aubio_fft_t * s, const fvec_t * compspec, fvec_t * ou
    for (i = 0; i < output->length; i++) {
      output->data[i] = s->out[i]*renorm;
    }
-#else /* HAVE_FFTW3 */
-#ifdef HAVE_ACCELERATE        // using ACCELERATE
+
+#elif defined HAVE_ACCELERATE // using ACCELERATE
    // convert from real imag  [ r0, r1, ..., rN, iN-1, .., i2, i1]
    // to vDSP packed format   [ r0, rN, r1, i1, ..., rN-1, iN-1 ]
    s->out[0] = compspec->data[0];
@@ -332,6 +424,23 @@ void aubio_fft_rdo_complex(aubio_fft_t * s, const fvec_t * compspec, fvec_t * ou
    // apply scaling
    smpl_t scale = 1.0 / s->winsize;
    aubio_vDSP_vsmul(output->data, 1, &scale, output->data, 1, s->fft_size);
+
+#elif defined HAVE_INTEL_IPP  // using Intel IPP
+
+  // convert from real imag  [ r0, 0, ..., rN, iN-1, .., i2, i1, iN-1] to complex format
+  s->complexOut[0].re = compspec->data[0];
+  s->complexOut[0].im = 0;
+  s->complexOut[s->fft_size / 2].re = compspec->data[s->fft_size / 2];
+  s->complexOut[s->fft_size / 2].im = 0.0;
+  for (i = 1; i < s->fft_size / 2; i++) {
+    s->complexOut[i].re = compspec->data[i];
+    s->complexOut[i].im = compspec->data[s->fft_size - i];
+  }
+  // apply fft
+  aubio_ippsFFTInv_CCSToR((const aubio_IppFloat *)s->complexOut, output->data, s->fftSpec, s->memBuffer);
+  // apply scaling
+  aubio_ippsMulC(output->data, 1.0 / s->winsize, output->data, s->fft_size);
+
  #else                         // using OOURA
    smpl_t scale = 2.0 / s->winsize;
    s->out[0] = compspec->data[0];
@@ -344,8 +453,7 @@ void aubio_fft_rdo_complex(aubio_fft_t * s, const fvec_t * compspec, fvec_t * ou
    for (i=0; i < s->winsize; i++) {
      output->data[i] = s->out[i] * scale;
    }
-#endif /* HAVE_ACCELERATE */
-#endif /* HAVE_FFTW3 */
+#endif
  }
  
  void aubio_fft_get_spectrum(const fvec_t * compspec, cvec_t * spectrum) {
@@ -365,10 +473,26 @@ void aubio_fft_get_phas(const fvec_t * compspec, cvec_t * spectrum) {
    } else {
      spectrum->phas[0] = 0.;
    }
+#if defined(HAVE_INTEL_IPP)
+  // convert from real imag  [ r0, r1, ..., rN, iN-1, ..., i2, i1, i0]
+  //                     to  [ r0, r1, ..., rN, i0, i1, i2, ..., iN-1]
+  for (i = 1; i < spectrum->length / 2; i++) {
+    ELEM_SWAP(compspec->data[compspec->length - i],
+        compspec->data[spectrum->length + i - 1]);
+  }
+  aubio_ippsAtan2(compspec->data + spectrum->length,
+      compspec->data + 1, spectrum->phas + 1, spectrum->length - 1);
+  // revert the imaginary part back again
+  for (i = 1; i < spectrum->length / 2; i++) {
+    ELEM_SWAP(compspec->data[spectrum->length + i - 1],
+        compspec->data[compspec->length - i]);
+  }
+#else
    for (i=1; i < spectrum->length - 1; i++) {
      spectrum->phas[i] = ATAN2(compspec->data[compspec->length-i],
          compspec->data[i]);
    }
+#endif
    if (compspec->data[compspec->length/2] < 0) {
      spectrum->phas[spectrum->length - 1] = PI;
    } else {
diff --git a/src/wscript_build b/src/wscript_build

index 124a93f..aa85d95 100644 (file)
--- a/src/wscript_build
+++ b/src/wscript_build
@@ -3,6 +3,7 @@
  uselib = []
  uselib += ['M']
  uselib += ['FFTW3', 'FFTW3F']
+uselib += ['INTEL_IPP']
  uselib += ['SAMPLERATE']
  uselib += ['SNDFILE']
  uselib += ['AVCODEC']
diff --git a/wscript b/wscript

index 21498b6..35f582a 100644 (file)
--- a/wscript
+++ b/wscript
@@ -50,6 +50,9 @@ def options(ctx):
      add_option_enable_disable(ctx, 'fftw3', default = False,
              help_str = 'compile with fftw3 instead of ooura',
              help_disable_str = 'do not compile with fftw3')
+    add_option_enable_disable(ctx, 'intelipp', default = False,
+            help_str = 'use Intel IPP libraries (auto)',
+            help_disable_str = 'do not use Intel IPP libraries')
      add_option_enable_disable(ctx, 'complex', default = False,
              help_str ='compile with C99 complex',
              help_disable_str = 'do not use C99 complex (default)' )
@@ -155,6 +158,10 @@ def configure(ctx):
          ctx.env.LINKFLAGS += ['/DEBUG', '/INCREMENTAL:NO']
          # configure warnings
          ctx.env.CFLAGS += ['/W4', '/D_CRT_SECURE_NO_WARNINGS']
+        # ignore "possible loss of data" warnings
+        ctx.env.CFLAGS += ['/wd4305', '/wd4244', '/wd4245', '/wd4267']
+        # ignore "unreferenced formal parameter" warnings
+        ctx.env.CFLAGS += ['/wd4100']
          # set optimization level and runtime libs
          if (ctx.options.build_type == "release"):
              ctx.env.CFLAGS += ['/Ox']
@@ -244,7 +251,9 @@ def configure(ctx):
          ctx.env.cstlib_PATTERN = '%s.a'
  
          # tell emscripten functions we want to expose
-        from python.lib.gen_external import get_c_declarations, get_cpp_objects_from_c_declarations, get_all_func_names_from_lib, generate_lib_from_c_declarations
+        from python.lib.gen_external import get_c_declarations, \
+                get_cpp_objects_from_c_declarations, get_all_func_names_from_lib, \
+                generate_lib_from_c_declarations
          c_decls = get_c_declarations(usedouble=False)  # emscripten can't use double
          objects = list(get_cpp_objects_from_c_declarations(c_decls))
          # ensure that aubio structs are exported
@@ -283,6 +292,21 @@ def configure(ctx):
      else:
          ctx.msg('Checking if complex.h is enabled', 'no')
  
+    # check for Intel IPP
+    if (ctx.options.enable_intelipp != False):
+        has_ipp_headers = ctx.check(header_name=['ippcore.h', 'ippvm.h', 'ipps.h'],
+                mandatory = False)
+        has_ipp_libs = ctx.check(lib=['ippcore', 'ippvm', 'ipps'],
+                uselib_store='INTEL_IPP', mandatory = False)
+        if (has_ipp_headers and has_ipp_libs):
+            ctx.msg('Checking if Intel IPP is available', 'yes')
+            ctx.define('HAVE_INTEL_IPP', 1)
+            if ctx.env.CC_NAME == 'msvc':
+                # force linking multi-threaded static IPP libraries on Windows with msvc
+                ctx.define('_IPP_SEQUENTIAL_STATIC', 1)
+        else:
+            ctx.msg('Checking if Intel IPP is available', 'no')
+
      # check for fftw3
      if (ctx.options.enable_fftw3 != False or ctx.options.enable_fftw3f != False):
          # one of fftwf or fftw3f
@@ -306,13 +330,15 @@ def configure(ctx):
                          mandatory = ctx.options.enable_fftw3)
          ctx.define('HAVE_FFTW3', 1)
  
-    # fftw not enabled, use vDSP or ooura
+    # fftw not enabled, use vDSP, intelIPP or ooura
      if 'HAVE_FFTW3F' in ctx.env.define_key:
          ctx.msg('Checking for FFT implementation', 'fftw3f')
      elif 'HAVE_FFTW3' in ctx.env.define_key:
          ctx.msg('Checking for FFT implementation', 'fftw3')
      elif 'HAVE_ACCELERATE' in ctx.env.define_key:
          ctx.msg('Checking for FFT implementation', 'vDSP')
+    elif 'HAVE_INTEL_IPP' in ctx.env.define_key:
+        ctx.msg('Checking for FFT implementation', 'Intel IPP')
      else:
          ctx.msg('Checking for FFT implementation', 'ooura')
author	Paul Brossier <piem@piem.org>
	Sun, 1 Oct 2017 18:04:30 +0000 (20:04 +0200)
committer	Paul Brossier <piem@piem.org>
	Sun, 1 Oct 2017 18:04:30 +0000 (20:04 +0200)
src/aubio_priv.h		patch \| blob \| history
src/cvec.c		patch \| blob \| history
src/fvec.c		patch \| blob \| history
src/mathutils.c		patch \| blob \| history
src/mathutils.h		patch \| blob \| history
src/spectral/fft.c		patch \| blob \| history
src/wscript_build		patch \| blob \| history
wscript		patch \| blob \| history