From a7348ca5826c3a21980793ed4412306e08ffdff8 Mon Sep 17 00:00:00 2001 From: Paul Brossier Date: Tue, 16 Feb 2016 21:47:46 +0100 Subject: [PATCH] src/fmat.c: add optimized fmat_vecmul --- src/fmat.c | 28 ++++++++++++++++++++++++++++ src/fmat.h | 9 +++++++++ 2 files changed, 37 insertions(+) diff --git a/src/fmat.c b/src/fmat.c index a51c18a9..8dd3bdc8 100644 --- a/src/fmat.c +++ b/src/fmat.c @@ -154,3 +154,31 @@ void fmat_copy(fmat_t *s, fmat_t *t) { #endif } +void fmat_vecmul(fmat_t *s, fvec_t *scale, fvec_t *output) { + uint_t k; + assert(s->height == output->length); + assert(s->length == scale->length); +#if !defined(HAVE_ACCELERATE) && !defined(HAVE_ATLAS) + uint_t j; + fvec_zeros(output); + for (j = 0; j < s->length; j++) { + for (k = 0; k < s->height; k++) { + output->data[k] += scale->data[j] + * s->data[k][j]; + } + } +#elif defined(HAVE_ATLAS) + for (k = 0; k < s->height; k++) { + output->data[k] = aubio_cblas_dot( s->length, scale->data, 1, s->data[k], 1); + } +#elif defined(HAVE_ACCELERATE) +#if 0 + // seems slower and less precise (and dangerous?) + vDSP_mmul (s->data[0], 1, scale->data, 1, output->data, 1, s->height, 1, s->length); +#else + for (k = 0; k < s->height; k++) { + aubio_vDSP_dotpr( scale->data, 1, s->data[k], 1, &(output->data[k]), s->length); + } +#endif +#endif +} diff --git a/src/fmat.h b/src/fmat.h index f844832b..df2b835d 100644 --- a/src/fmat.h +++ b/src/fmat.h @@ -156,6 +156,15 @@ void fmat_weight(fmat_t *s, fmat_t *weight); */ void fmat_copy(fmat_t *s, fmat_t *t); +/* compute the product of a matrix by a vector + + \param s matrix to compute product with + \param scale vector to compute product with + \param output vector to store restults in + +*/ +void fmat_vecmul(fmat_t *s, fvec_t *scale, fvec_t *output); + #ifdef __cplusplus } #endif -- 2.11.0