c# - Would using the Parallel class improve performance when copying scanline from bitmap to bitmap? -
i need copy scanlines 1 byte* buffer byte* buffer different strides.
to i'm using rtlmovememory :
[dllimport("kernel32.dll", entrypoint = "rtlmovememory", setlasterror = false)] private static unsafe extern void movememory(void* dest, void* src, int size); the normal way like
for (int y = 0; y < height; y++ ) { movememory(dst_ptr, src_ptr, stride); src_ptr += src_stride; dst_ptr += dst_stride; } my question - faster using parallel class ?
parallel.for(0, height, (y) => { byte* src_ptr = src_base + y * src_stride; byte* dst_ptr = dst_base + y * dst_stride; movememory(dst_ptr, src_ptr, line_width); }); or impact performance negatively ?
actually, testing shows copying line-by-line faster. example, created array representing 1024 x 768 bitmap. parallel version 45% faster.
the parallel version faster when scan lines longer. below 1 kilobytes, single-threaded version faster.
tested .net 4.5, visual studio 2013. 64 bit mode. compiled release, , run without debugger attached.
private const int numlines = 1024; private const int linelength = 768*3; private const int arraysize = numlines*linelength; [dllimport("kernel32.dll", entrypoint = "rtlmovememory", setlasterror = false)] private static unsafe extern void movememory(void* dest, void* src, int size); unsafe public void test() { // initialize big array test copy var source = enumerable.range(0, arraysize).select(x => (byte)x).toarray(); var dest = new byte[arraysize]; fixed (byte* psource = source, pdest = dest) { // test single threaded // once jit console.writeline("testing single threaded..."); movesinglethread(psource, pdest, numlines, linelength); // okay, time it. var sw = stopwatch.startnew(); movesinglethread(psource, pdest, numlines, linelength); sw.stop(); console.writeline("single threaded: {0:n0} ticks", sw.elapsedticks); var singleticks = sw.elapsedticks; console.writeline("testing parallel"); // once jit moveparallel(psource, pdest, numlines, linelength); sw = stopwatch.startnew(); moveparallel(psource, pdest, numlines, linelength); sw.stop(); console.writeline("parallel: {0:n0} ticks", sw.elapsedticks); var diff = sw.elapsedticks - singleticks; var pct = (double) sw.elapsedticks/singleticks; console.writeline("difference: {0:n0} ticks {1:p2}", diff, pct); } } private unsafe void movesinglethread(byte* source, byte* dest, int nlines, int linelength) { var srcptr = source; var dstptr = dest; (int y = 0; y < nlines; ++y) { movememory(dstptr, srcptr, linelength); srcptr += linelength; dstptr += linelength; } } unsafe void moveparallel(byte* source, byte* dest, int nlines, int linelength) { parallel.for(0, nlines, (y) => { byte* srcptr = source + y * linelength; byte* dstptr = dest + y * linelength; movememory(dstptr, srcptr, linelength); }); }
Comments
Post a Comment