上一篇快速高斯模糊的原作者也有另一个比较快速的模糊算法Stack Blur,字面意思为堆栈模糊。
源地址为:http://incubator.quasimondo.com/processing/fast_blur_deluxe.php
这个算法在多个平台上都有实现,安卓以及IOS,JS等。
processing源码:http://incubator.quasimondo.com/processing/stackblur.pde
效果图:
转为C语言实现版本。
代码如下:
// Stack Blur v1.0 // // Author: Mario Klingemann <mario@quasimondo.com> // http://incubator.quasimondo.com // created Feburary 29, 2004 // C version updated and performance optimization by tntmonks(http://tntmonks.cnblogs.com) // This is a compromise between Gaussian Blur and Box blur // It creates much better looking blurs than Box Blur, but is // 7x faster than my Gaussian Blur implementation. // // I called it Stack Blur because this describes best how this // filter works internally: it creates a kind of moving stack // of colors whilst scanning through the image. Thereby it // just has to add one new block of color to the right side // of the stack and remove the leftmost color. The remaining // colors on the topmost layer of the stack are either added on // or reduced by one, depending on if they are on the right or // on the left side of the stack. // // If you are using this algorithm in your code please add // the following line: // // Stack Blur Algorithm by Mario Klingemann <mario@quasimondo.com> #define MAX(x,y) (x>y?x:y) #define MIN(x,y) (x>y?y:x) void fastStackBlur(unsigned char* pix, unsigned int w, unsigned int h, unsigned int comp, int radius) { unsigned int wm = w - 1; unsigned int hm = h - 1; unsigned int imageSize = w * h; unsigned int div = radius + radius + 1; unsigned char * rgb = (unsigned char *)malloc(sizeof(unsigned char) * imageSize * 3); unsigned char * r = rgb; unsigned char * g = rgb + imageSize; unsigned char * b = rgb + imageSize * 2; int rsum, gsum, bsum, x, y, i, p, yp, yi, yw; unsigned int *vmin = (unsigned int *)malloc(MAX(w, h) * sizeof(unsigned int)); int divsum = (div + 1) >> 1; divsum *= divsum; int *dv = (int *)malloc(256 * divsum * sizeof(int)); for (i = 0; i < 256 * divsum; i++) { dv[i] = (i / divsum); } yw = yi = 0; int(*stack)[3] = (int(*)[3])malloc(div * 3 * sizeof(int)); unsigned int stackpointer; unsigned int stackstart; int *sir; int rbs; int r1 = radius + 1; int routsum, goutsum, boutsum; int rinsum, ginsum, binsum; for (y = 0; y < h; y++) { rinsum = ginsum = binsum = routsum = goutsum = boutsum = rsum = gsum = bsum = 0; for (i = -radius; i <= radius; i++) { p = yi + (MIN(wm, MAX(i, 0))); sir = stack[i + radius]; sir[0] = pix[(p*comp)]; sir[1] = pix[(p*comp) + 1]; sir[2] = pix[(p*comp) + 2]; rbs = r1 - abs(i); rsum += sir[0] * rbs; gsum += sir[1] * rbs; bsum += sir[2] * rbs; if (i > 0) { rinsum += sir[0]; ginsum += sir[1]; binsum += sir[2]; } else { routsum += sir[0]; goutsum += sir[1]; boutsum += sir[2]; } } stackpointer = radius; for (x = 0; x < w; x++) { r[yi] = dv[rsum]; g[yi] = dv[gsum]; b[yi] = dv[bsum]; rsum -= routsum; gsum -= goutsum; bsum -= boutsum; stackstart = stackpointer - radius + div; sir = stack[stackstart % div]; routsum -= sir[0]; goutsum -= sir[1]; boutsum -= sir[2]; if (y == 0) { vmin[x] = MIN(x + radius + 1, wm); } p = yw + vmin[x]; sir[0] = pix[(p*comp)]; sir[1] = pix[(p*comp) + 1]; sir[2] = pix[(p*comp) + 2]; rinsum += sir[0]; ginsum += sir[1]; binsum += sir[2]; rsum += rinsum; gsum += ginsum; bsum += binsum; stackpointer = (stackpointer + 1) % div; sir = stack[(stackpointer) % div]; routsum += sir[0]; goutsum += sir[1]; boutsum += sir[2]; rinsum -= sir[0]; ginsum -= sir[1]; binsum -= sir[2]; yi++; } yw += w; } for (x = 0; x < w; x++) { rinsum = ginsum = binsum = routsum = goutsum = boutsum = rsum = gsum = bsum = 0; yp = -radius * w; for (i = -radius; i <= radius; i++) { yi = MAX(0, yp) + x; sir = stack[i + radius]; sir[0] = r[yi]; sir[1] = g[yi]; sir[2] = b[yi]; rbs = r1 - abs(i); rsum += r[yi] * rbs; gsum += g[yi] * rbs; bsum += b[yi] * rbs; if (i > 0) { rinsum += sir[0]; ginsum += sir[1]; binsum += sir[2]; } else { routsum += sir[0]; goutsum += sir[1]; boutsum += sir[2]; } if (i < hm) { yp += w; } } yi = x; stackpointer = radius; for (y = 0; y < h; y++) { pix[(yi*comp)] = dv[rsum]; pix[(yi*comp) + 1] = dv[gsum]; pix[(yi*comp) + 2] = dv[bsum]; rsum -= routsum; gsum -= goutsum; bsum -= boutsum; stackstart = stackpointer - radius + div; sir = stack[stackstart % div]; routsum -= sir[0]; goutsum -= sir[1]; boutsum -= sir[2]; if (x == 0) { vmin[y] = MIN(y + r1, hm) * w; } p = x + vmin[y]; sir[0] = r[p]; sir[1] = g[p]; sir[2] = b[p]; rinsum += sir[0]; ginsum += sir[1]; binsum += sir[2]; rsum += rinsum; gsum += ginsum; bsum += binsum; stackpointer = (stackpointer + 1) % div; sir = stack[stackpointer]; routsum += sir[0]; goutsum += sir[1]; boutsum += sir[2]; rinsum -= sir[0]; ginsum -= sir[1]; binsum -= sir[2]; yi += w; } } free(rgb); free(vmin); free(dv); free(stack); }
在博主机子上测试一张5000x3000的图像,模糊半径为10的情况下,耗时1s.