下面代码参考TI的实现:
/* NAME */
/* DSPF_dp_mat_mul_cplx -- Complex matrix multiplication */
/* */
/* USAGE */
/* */
/* This routine has the following C prototype: */
/* */
/* void DSPF_dp_mat_mul_cplx( */
/* const double* x, */
/* int r1, */
/* int c1, */
/* const double* y, */
/* int c2, */
/* double* restrict r */
/* ) */
/* */
/* x[2*r1*c1]: Input matrix containing r1*c1 complex */
/* floating point numbers having r1 rows and c1 */
/* columns of complex numbers. */
/* r1 : No. of rows in matrix x. */
/* c1 : No. of columns in matrix x. */
/* Also no. of rows in matrix y. */
/* y[2*c1*c2]: Input matrix containing c1*c2 complex */
/* floating point numbers having c1 rows and c2 */
/* columns of complex numbers. */
/* c2 : No. of columns in matrix y. */
/* r[2*r1*c2]: Output matrix of c1*c2 complex floating */
/* point numbers having c1 rows and c2 columns of */
/* complex numbers. */
/* */
/* Complex numbers are stored consecutively with */
/* real values stored in even positions and */
/* imaginary values in odd positions. */
/* */
/* DESCRIPTION */
/* */
/* This function computes the expression "r = x * y" for the */
/* matrices x and y. The columnar dimension of x must match the row */
/* dimension of y. The resulting matrix has the same number of rows */
/* as x and the same number of columns as y. */
/* */
/* Each element of the matrix is assumed to be complex numbers with */
/* Real values are stored in even positions and imaginary */
/* values in odd positions. */
/* */
/* TECHNIQUES */
/* */
/* 1. Innermost loop is unrolled twice. */
/* 2. Outermost loop is executed in parallel with innner loops. */
/* */
/* ASSUMPTIONS */
/* */
/* 1. r1,r2>=1,c1 should be a multiple of 2 and >=2. */
/* 2. x should be padded with 6 words */
/* */
/* */
/* C CODE */
/* */
void DSPF_dp_mat_mul_cplx(const double* x, int r1, int c1,
const double* y, int c2, double* r)
{
double real, imag;
int i, j, k;
for(i = 0; i < r1; i++)
{
for(j = 0; j < c2; j++)
{
real=0;
imag=0;
for(k = 0; k < c1; k++)
{
real += (x[i*2*c1 + 2*k]*y[k*2*c2 + 2*j]
-x[i*2*c1 + 2*k + 1] * y[k*2*c2 + 2*j + 1]);
imag+=(x[i*2*c1 + 2*k] * y[k*2*c2 + 2*j + 1]
+ x[i*2*c1 + 2*k + 1] * y[k*2*c2 + 2*j]);
}
r[i*2*c2 + 2*j] = real;
r[i*2*c2 + 2*j + 1] = imag;
}
}
}
/* NOTES */
/* */
/* 1. Real values are stored in even word positions and imaginary */
/* values in odd positions. */
/* 2. Endian: This code is LITTLE ENDIAN. */
/* 3. Interruptibility: This code is interrupt tolerant but not */
/* interruptible. */
/* */
/* CYCLES */
/* */
/* 8*r1*c1*c2'+ 18*(r1*c2)+40 WHERE c2'=2*ceil(c2/2) */
/* When r1=3, c1=4, c2=4, cycles = 640 */
/* When r1=4, c1=4, c2=5, cycles = 1040 */
/* */
/* CODESIZE */
/* */
/* 832 bytes */
/* ------------------------------------------------------------------------ */
/* Copyright (c) 2004 Texas Instruments, Incorporated. */
/* All Rights Reserved. */
/* ======================================================================== */
#ifndef DSPF_DP_MAT_MUL_CPLX_H_
#define DSPF_DP_MAT_MUL_CPLX_H_ 1
void DSPF_dp_mat_mul_cplx(
const double* x,
int r1,
int c1,
const double* y,
int c2,
double* restrict r
);
#endif
/* ======================================================================== */
/* End of file: DSPF_dp_mat_mul_cplx.h */
/* ------------------------------------------------------------------------ */
测试程序如下:
(说明:下面代码分别包含了三个矩阵相乘的函数)
#include <stdio.h>
#include <stdlib.h> #include "complex.h" void matrix_mul( const double* x, int r1, int c1, const double* y, int c2, double* r ) { int i,j,k; for(i=0;i<r1;++i) { for(j=0;j<c2;++j) { for(k=0;k<c1;++k) { //r[i*r1 + j] =r[i*r1 + j] + x[i*r1 + k] * y[k*c1 + j]; r[i*c2 + j] =r[i*c2 + j] + x[i*c1 + k] * y[k*c2 + j]; } } } } void matrix_mul_cplx( const Complex* x, int r1, int c1, const Complex* y, int c2, Complex* r ) { int i,j,k; Complex temp; temp.real = 0; temp.image = 0; for(i=0;i<r1;++i) { for(j=0;j<c2;++j) { for(k=0;k<c1;++k) { //r[i*r1 + j] =r[i*r1 + j] + x[i*r1 + k] * y[k*c1 + j]; temp = complex_mul(x[i*c1 + k], y[k*c2 + j]); r[i*c2 + j] =complex_add(r[i*c2 + j], temp); } } } } void DSPF_dp_mat_mul_cplx(const double* x, int r1, int c1, const double* y, int c2, double* r) { double real, imag; int i, j, k; for(i = 0; i < r1; i++) { for(j = 0; j < c2; j++) { real=0; imag=0; for(k = 0; k < c1; k++) { real += (x[i*2*c1 + 2*k]*y[k*2*c2 + 2*j] -x[i*2*c1 + 2*k + 1] * y[k*2*c2 + 2*j + 1]); imag+=(x[i*2*c1 + 2*k] * y[k*2*c2 + 2*j + 1] + x[i*2*c1 + 2*k + 1] * y[k*2*c2 + 2*j]); } r[i*2*c2 + 2*j] = real; r[i*2*c2 + 2*j + 1] = imag; } } } int main() { //matrix_mul test /* double x[9] = {1,2,3,4,5,6,7,8,9}; //矩阵按行存放 int r1 = 3, c1 = 3,c2 = 1; double y[3] = {10,11,12}; double r[3] = {0}; int i,j; //printf("r : %lf\n%lf\n",r[0],r[1]); printf("r = : \n"); for(i=0;i<r1;++i) { for(j=0;j<c2;++j) { //printf("%lf\t",r[i*r1 + j]); printf("%lf\t",r[i*c2 + j]); } printf("\n"); } matrix_mul(x,r1,c1,y,c2,r); printf("x*y = : \n"); for(i=0;i<r1;++i) { for(j=0;j<c2;++j) { printf("%lf\t",r[i*c2 + j]); } printf("\n"); } */ /* //matrix_mul_cplx test Complex x[4] = {{1,2}, {3,4}, {5,6}, {7,8}}; //矩阵按行存放 int r1 = 2, c1 = 2,c2 = 1; Complex y[2] = {{1,2}, {3,4}}; Complex r[2] = {{0,0}, {0,0}}; int i,j; //printf("r : %lf\n%lf\n",r[0],r[1]); printf("r = : \n"); for(i=0;i<r1;++i) { for(j=0;j<c2;++j) { printf("%lf + j*%lf\t",r[i*c2 + j].real , r[i*c2 + j].image); } printf("\n"); } matrix_mul_cplx(x,r1,c1,y,c2,r); printf("x*y = : \n"); for(i=0;i<r1;++i) { for(j=0;j<c2;++j) { printf("%lf + j*%lf\t",r[i*c2 + j].real , r[i*c2 + j].image); } printf("\n"); } */ //DSPF_dp_mat_mul_cplx test Complex x[4] = {{1,2}, {3,4}, {5,6}, {7,8}}; //矩阵按行存放 int r1 = 2, c1 = 2,c2 = 1; Complex y[2] = {{1,2}, {3,4}}; Complex r[2] = {{0,0}, {0,0}}; int i,j; //printf("r : %lf\n%lf\n",r[0],r[1]); printf("r = : \n"); for(i=0;i<r1;++i) { for(j=0;j<c2;++j) { printf("%lf + j*%lf\t",r[i*c2 + j].real , r[i*c2 + j].image); } printf("\n"); } DSPF_dp_mat_mul_cplx((double*)x, r1,c1, (double*)y, c2, (double*)r); printf("x*y = : \n"); for(i=0;i<r1;++i) { for(j=0;j<c2;++j) { printf("%lf + j*%lf\t",r[i*c2 + j].real , r[i*c2 + j].image); } printf("\n"); } return 0; }