[ec2-user@ip-172-31-5-194 cudalucas]$ ./CUDALucas -threadbench 1 32768 50 1 CUDALucas v2.06beta 64-bit build, compiled Aug 9 2018 @ 23:25:35 binary compiled for CUDA 9.20 CUDA runtime version 9.20 CUDA driver version 9.20 ------- DEVICE 0 ------- name Tesla V100-SXM2-16GB UUID ECC Support? Enabled Compatibility 7.0 clockRate (MHz) 1530 memClockRate (MHz) 877 totalGlobalMem 16945512448 totalConstMem 65536 l2CacheSize 6291456 sharedMemPerBlock 49152 regsPerBlock 65536 warpSize 32 memPitch 2147483647 maxThreadsPerBlock 1024 maxThreadsPerMP 2048 multiProcessorCount 80 maxThreadsDim[3] 1024,1024,64 maxGridSize[3] 2147483647,65535,65535 textureAlignment 512 deviceOverlap 1 pciDeviceID 30 pciBusID 0 Thread bench, testing various thread sizes for ffts 1K to 32768K, doing 50 passes. fft = 1K, ave time = 0.0208 ms, square: 32, splice: 128 fft = 1K, ave time = 0.0209 ms, square: 64, splice: 128 fft = 1K, ave time = 0.0210 ms, square: 128, splice: 128 fft = 1K, ave time = 0.0191 ms, square: 256, splice: 128 fft = 1K, ave time = 0.0191 ms, square: 256, splice: 32 fft = 1K, ave time = 0.0191 ms, square: 256, splice: 64 fft = 1K, ave time = 0.0182 ms, square: 256, splice: 128 fft = 1K, ave time = 0.0181 ms, square: 256, splice: 256 fft = 1K, ave time = 0.0181 ms, square: 256, splice: 512 fft = 1K, ave time = 0.0181 ms, square: 256, splice: 1024 fft = 1K, min time = 0.0181 ms, square: 256, splice: 512 fft = 2K, ave time = 0.0239 ms, square: 32, splice: 128 fft = 2K, ave time = 0.0239 ms, square: 64, splice: 128 fft = 2K, ave time = 0.0239 ms, square: 128, splice: 128 fft = 2K, ave time = 0.0240 ms, square: 256, splice: 128 fft = 2K, ave time = 0.0240 ms, square: 512, splice: 128 fft = 2K, ave time = 0.0239 ms, square: 128, splice: 32 fft = 2K, ave time = 0.0238 ms, square: 128, splice: 64 fft = 2K, ave time = 0.0239 ms, square: 128, splice: 128 fft = 2K, ave time = 0.0239 ms, square: 128, splice: 256 fft = 2K, ave time = 0.0240 ms, square: 128, splice: 512 fft = 2K, ave time = 0.0239 ms, square: 128, splice: 1024 fft = 2K, min time = 0.0238 ms, square: 128, splice: 64 fft = 4K, ave time = 0.0239 ms, square: 32, splice: 128 fft = 4K, ave time = 0.0239 ms, square: 64, splice: 128 fft = 4K, ave time = 0.0238 ms, square: 128, splice: 128 fft = 4K, ave time = 0.0239 ms, square: 256, splice: 128 fft = 4K, ave time = 0.0238 ms, square: 512, splice: 128 fft = 4K, ave time = 0.0247 ms, square: 1024, splice: 128 fft = 4K, ave time = 0.0238 ms, square: 512, splice: 32 fft = 4K, ave time = 0.0239 ms, square: 512, splice: 64 fft = 4K, ave time = 0.0238 ms, square: 512, splice: 128 fft = 4K, ave time = 0.0239 ms, square: 512, splice: 256 fft = 4K, ave time = 0.0239 ms, square: 512, splice: 512 fft = 4K, ave time = 0.0240 ms, square: 512, splice: 1024 fft = 4K, min time = 0.0238 ms, square: 512, splice: 128 fft = 8K, ave time = 0.0343 ms, square: 32, splice: 128 fft = 8K, ave time = 0.0343 ms, square: 64, splice: 128 fft = 8K, ave time = 0.0343 ms, square: 128, splice: 128 fft = 8K, ave time = 0.0347 ms, square: 256, splice: 128 fft = 8K, ave time = 0.0354 ms, square: 512, splice: 128 fft = 8K, ave time = 0.0368 ms, square: 1024, splice: 128 fft = 8K, ave time = 0.0343 ms, square: 64, splice: 32 fft = 8K, ave time = 0.0343 ms, square: 64, splice: 64 fft = 8K, ave time = 0.0343 ms, square: 64, splice: 128 fft = 8K, ave time = 0.0343 ms, square: 64, splice: 256 fft = 8K, ave time = 0.0343 ms, square: 64, splice: 512 fft = 8K, ave time = 0.0343 ms, square: 64, splice: 1024 fft = 8K, min time = 0.0343 ms, square: 64, splice: 1024 fft = 16K, ave time = 0.0362 ms, square: 32, splice: 128 fft = 16K, ave time = 0.0361 ms, square: 64, splice: 128 fft = 16K, ave time = 0.0362 ms, square: 128, splice: 128 fft = 16K, ave time = 0.0364 ms, square: 256, splice: 128 fft = 16K, ave time = 0.0369 ms, square: 512, splice: 128 fft = 16K, ave time = 0.0384 ms, square: 1024, splice: 128 fft = 16K, ave time = 0.0361 ms, square: 64, splice: 32 fft = 16K, ave time = 0.0361 ms, square: 64, splice: 64 fft = 16K, ave time = 0.0361 ms, square: 64, splice: 128 fft = 16K, ave time = 0.0361 ms, square: 64, splice: 256 fft = 16K, ave time = 0.0361 ms, square: 64, splice: 512 fft = 16K, ave time = 0.0361 ms, square: 64, splice: 1024 fft = 16K, min time = 0.0361 ms, square: 64, splice: 512 fft = 32K, ave time = 0.0376 ms, square: 32, splice: 128 fft = 32K, ave time = 0.0376 ms, square: 64, splice: 128 fft = 32K, ave time = 0.0376 ms, square: 128, splice: 128 fft = 32K, ave time = 0.0377 ms, square: 256, splice: 128 fft = 32K, ave time = 0.0382 ms, square: 512, splice: 128 fft = 32K, ave time = 0.0398 ms, square: 1024, splice: 128 fft = 32K, ave time = 0.0376 ms, square: 64, splice: 32 fft = 32K, ave time = 0.0376 ms, square: 64, splice: 64 fft = 32K, ave time = 0.0376 ms, square: 64, splice: 128 fft = 32K, ave time = 0.0375 ms, square: 64, splice: 256 fft = 32K, ave time = 0.0376 ms, square: 64, splice: 512 fft = 32K, ave time = 0.0376 ms, square: 64, splice: 1024 fft = 32K, min time = 0.0375 ms, square: 64, splice: 256 fft = 50K, ave time = 0.0384 ms, square: 32, splice: 128 fft = 50K, ave time = 0.0384 ms, square: 64, splice: 128 fft = 50K, ave time = 0.0385 ms, square: 128, splice: 128 fft = 50K, ave time = 0.0386 ms, square: 256, splice: 128 fft = 50K, ave time = 0.0391 ms, square: 512, splice: 128 fft = 50K, ave time = 0.0382 ms, square: 32, splice: 32 fft = 50K, ave time = 0.0384 ms, square: 32, splice: 64 fft = 50K, ave time = 0.0384 ms, square: 32, splice: 128 fft = 50K, ave time = 0.0383 ms, square: 32, splice: 256 fft = 50K, ave time = 0.0383 ms, square: 32, splice: 512 fft = 50K, ave time = 0.0384 ms, square: 32, splice: 1024 fft = 50K, min time = 0.0382 ms, square: 32, splice: 32 fft = 54K, ave time = 0.0398 ms, square: 32, splice: 128 fft = 54K, ave time = 0.0399 ms, square: 64, splice: 128 fft = 54K, ave time = 0.0399 ms, square: 128, splice: 128 fft = 54K, ave time = 0.0400 ms, square: 256, splice: 128 fft = 54K, ave time = 0.0403 ms, square: 512, splice: 128 fft = 54K, ave time = 0.0397 ms, square: 32, splice: 32 fft = 54K, ave time = 0.0398 ms, square: 32, splice: 64 fft = 54K, ave time = 0.0398 ms, square: 32, splice: 128 fft = 54K, ave time = 0.0398 ms, square: 32, splice: 256 fft = 54K, ave time = 0.0398 ms, square: 32, splice: 512 fft = 54K, ave time = 0.0398 ms, square: 32, splice: 1024 fft = 54K, min time = 0.0397 ms, square: 32, splice: 32 fft = 64K, ave time = 0.0414 ms, square: 32, splice: 128 fft = 64K, ave time = 0.0416 ms, square: 64, splice: 128 fft = 64K, ave time = 0.0417 ms, square: 128, splice: 128 fft = 64K, ave time = 0.0415 ms, square: 256, splice: 128 fft = 64K, ave time = 0.0418 ms, square: 512, splice: 128 fft = 64K, ave time = 0.0433 ms, square: 1024, splice: 128 fft = 64K, ave time = 0.0413 ms, square: 32, splice: 32 fft = 64K, ave time = 0.0414 ms, square: 32, splice: 64 fft = 64K, ave time = 0.0414 ms, square: 32, splice: 128 fft = 64K, ave time = 0.0414 ms, square: 32, splice: 256 fft = 64K, ave time = 0.0414 ms, square: 32, splice: 512 fft = 64K, ave time = 0.0414 ms, square: 32, splice: 1024 fft = 64K, min time = 0.0413 ms, square: 32, splice: 32 fft = 98K, ave time = 0.0431 ms, square: 32, splice: 128 fft = 98K, ave time = 0.0430 ms, square: 64, splice: 128 fft = 98K, ave time = 0.0431 ms, square: 128, splice: 128 fft = 98K, ave time = 0.0433 ms, square: 256, splice: 128 fft = 98K, ave time = 0.0437 ms, square: 512, splice: 128 fft = 98K, ave time = 0.0429 ms, square: 64, splice: 32 fft = 98K, ave time = 0.0431 ms, square: 64, splice: 64 fft = 98K, ave time = 0.0430 ms, square: 64, splice: 128 fft = 98K, ave time = 0.0430 ms, square: 64, splice: 256 fft = 98K, ave time = 0.0430 ms, square: 64, splice: 512 fft = 98K, ave time = 0.0430 ms, square: 64, splice: 1024 fft = 98K, min time = 0.0429 ms, square: 64, splice: 32 fft = 100K, ave time = 0.0456 ms, square: 32, splice: 128 fft = 100K, ave time = 0.0454 ms, square: 64, splice: 128 fft = 100K, ave time = 0.0456 ms, square: 128, splice: 128 fft = 100K, ave time = 0.0457 ms, square: 256, splice: 128 fft = 100K, ave time = 0.0461 ms, square: 512, splice: 128 fft = 100K, ave time = 0.0469 ms, square: 1024, splice: 128 fft = 100K, ave time = 0.0453 ms, square: 64, splice: 32 fft = 100K, ave time = 0.0455 ms, square: 64, splice: 64 fft = 100K, ave time = 0.0454 ms, square: 64, splice: 128 fft = 100K, ave time = 0.0454 ms, square: 64, splice: 256 fft = 100K, ave time = 0.0454 ms, square: 64, splice: 512 fft = 100K, ave time = 0.0454 ms, square: 64, splice: 1024 fft = 100K, min time = 0.0453 ms, square: 64, splice: 32 fft = 128K, ave time = 0.0457 ms, square: 32, splice: 128 fft = 128K, ave time = 0.0458 ms, square: 64, splice: 128 fft = 128K, ave time = 0.0458 ms, square: 128, splice: 128 fft = 128K, ave time = 0.0460 ms, square: 256, splice: 128 fft = 128K, ave time = 0.0458 ms, square: 512, splice: 128 fft = 128K, ave time = 0.0468 ms, square: 1024, splice: 128 fft = 128K, ave time = 0.0454 ms, square: 32, splice: 32 fft = 128K, ave time = 0.0455 ms, square: 32, splice: 64 fft = 128K, ave time = 0.0457 ms, square: 32, splice: 128 fft = 128K, ave time = 0.0456 ms, square: 32, splice: 256 fft = 128K, ave time = 0.0456 ms, square: 32, splice: 512 fft = 128K, ave time = 0.0457 ms, square: 32, splice: 1024 fft = 128K, min time = 0.0454 ms, square: 32, splice: 32 fft = 144K, ave time = 0.0494 ms, square: 32, splice: 128 fft = 144K, ave time = 0.0494 ms, square: 64, splice: 128 fft = 144K, ave time = 0.0496 ms, square: 128, splice: 128 fft = 144K, ave time = 0.0494 ms, square: 256, splice: 128 fft = 144K, ave time = 0.0495 ms, square: 512, splice: 128 fft = 144K, ave time = 0.0502 ms, square: 1024, splice: 128 fft = 144K, ave time = 0.0491 ms, square: 32, splice: 32 fft = 144K, ave time = 0.0492 ms, square: 32, splice: 64 fft = 144K, ave time = 0.0494 ms, square: 32, splice: 128 fft = 144K, ave time = 0.0494 ms, square: 32, splice: 256 fft = 144K, ave time = 0.0494 ms, square: 32, splice: 512 fft = 144K, ave time = 0.0494 ms, square: 32, splice: 1024 fft = 144K, min time = 0.0491 ms, square: 32, splice: 32 fft = 162K, ave time = 0.0499 ms, square: 32, splice: 128 fft = 162K, ave time = 0.0498 ms, square: 64, splice: 128 fft = 162K, ave time = 0.0499 ms, square: 128, splice: 128 fft = 162K, ave time = 0.0501 ms, square: 256, splice: 128 fft = 162K, ave time = 0.0505 ms, square: 512, splice: 128 fft = 162K, ave time = 0.0495 ms, square: 64, splice: 32 fft = 162K, ave time = 0.0497 ms, square: 64, splice: 64 fft = 162K, ave time = 0.0498 ms, square: 64, splice: 128 fft = 162K, ave time = 0.0498 ms, square: 64, splice: 256 fft = 162K, ave time = 0.0498 ms, square: 64, splice: 512 fft = 162K, ave time = 0.0498 ms, square: 64, splice: 1024 fft = 162K, min time = 0.0495 ms, square: 64, splice: 32 fft = 200K, ave time = 0.0521 ms, square: 32, splice: 128 fft = 200K, ave time = 0.0519 ms, square: 64, splice: 128 fft = 200K, ave time = 0.0517 ms, square: 128, splice: 128 fft = 200K, ave time = 0.0517 ms, square: 256, splice: 128 fft = 200K, ave time = 0.0522 ms, square: 512, splice: 128 fft = 200K, ave time = 0.0537 ms, square: 1024, splice: 128 fft = 200K, ave time = 0.0514 ms, square: 128, splice: 32 fft = 200K, ave time = 0.0515 ms, square: 128, splice: 64 fft = 200K, ave time = 0.0517 ms, square: 128, splice: 128 fft = 200K, ave time = 0.0518 ms, square: 128, splice: 256 fft = 200K, ave time = 0.0519 ms, square: 128, splice: 512 fft = 200K, ave time = 0.0519 ms, square: 128, splice: 1024 fft = 200K, min time = 0.0514 ms, square: 128, splice: 32 fft = 216K, ave time = 0.0537 ms, square: 32, splice: 128 fft = 216K, ave time = 0.0537 ms, square: 64, splice: 128 fft = 216K, ave time = 0.0534 ms, square: 128, splice: 128 fft = 216K, ave time = 0.0536 ms, square: 256, splice: 128 fft = 216K, ave time = 0.0536 ms, square: 512, splice: 128 fft = 216K, ave time = 0.0551 ms, square: 1024, splice: 128 fft = 216K, ave time = 0.0532 ms, square: 128, splice: 32 fft = 216K, ave time = 0.0533 ms, square: 128, splice: 64 fft = 216K, ave time = 0.0535 ms, square: 128, splice: 128 fft = 216K, ave time = 0.0537 ms, square: 128, splice: 256 fft = 216K, ave time = 0.0537 ms, square: 128, splice: 512 fft = 216K, ave time = 0.0537 ms, square: 128, splice: 1024 fft = 216K, min time = 0.0532 ms, square: 128, splice: 32 fft = 250K, ave time = 0.0547 ms, square: 32, splice: 128 fft = 250K, ave time = 0.0545 ms, square: 64, splice: 128 fft = 250K, ave time = 0.0546 ms, square: 128, splice: 128 fft = 250K, ave time = 0.0547 ms, square: 256, splice: 128 fft = 250K, ave time = 0.0551 ms, square: 512, splice: 128 fft = 250K, ave time = 0.0543 ms, square: 64, splice: 32 fft = 250K, ave time = 0.0544 ms, square: 64, splice: 64 fft = 250K, ave time = 0.0545 ms, square: 64, splice: 128 fft = 250K, ave time = 0.0548 ms, square: 64, splice: 256 fft = 250K, ave time = 0.0548 ms, square: 64, splice: 512 fft = 250K, ave time = 0.0548 ms, square: 64, splice: 1024 fft = 250K, min time = 0.0543 ms, square: 64, splice: 32 fft = 256K, ave time = 0.0554 ms, square: 32, splice: 128 fft = 256K, ave time = 0.0554 ms, square: 64, splice: 128 fft = 256K, ave time = 0.0553 ms, square: 128, splice: 128 fft = 256K, ave time = 0.0554 ms, square: 256, splice: 128 fft = 256K, ave time = 0.0558 ms, square: 512, splice: 128 fft = 256K, ave time = 0.0563 ms, square: 1024, splice: 128 fft = 256K, ave time = 0.0550 ms, square: 128, splice: 32 fft = 256K, ave time = 0.0551 ms, square: 128, splice: 64 fft = 256K, ave time = 0.0553 ms, square: 128, splice: 128 fft = 256K, ave time = 0.0555 ms, square: 128, splice: 256 fft = 256K, ave time = 0.0555 ms, square: 128, splice: 512 fft = 256K, ave time = 0.0555 ms, square: 128, splice: 1024 fft = 256K, min time = 0.0550 ms, square: 128, splice: 32 fft = 288K, ave time = 0.0593 ms, square: 32, splice: 128 fft = 288K, ave time = 0.0592 ms, square: 64, splice: 128 fft = 288K, ave time = 0.0592 ms, square: 128, splice: 128 fft = 288K, ave time = 0.0592 ms, square: 256, splice: 128 fft = 288K, ave time = 0.0592 ms, square: 512, splice: 128 fft = 288K, ave time = 0.0598 ms, square: 1024, splice: 128 fft = 288K, ave time = 0.0589 ms, square: 64, splice: 32 fft = 288K, ave time = 0.0590 ms, square: 64, splice: 64 fft = 288K, ave time = 0.0592 ms, square: 64, splice: 128 fft = 288K, ave time = 0.0595 ms, square: 64, splice: 256 fft = 288K, ave time = 0.0596 ms, square: 64, splice: 512 fft = 288K, ave time = 0.0596 ms, square: 64, splice: 1024 fft = 288K, min time = 0.0589 ms, square: 64, splice: 32 fft = 320K, ave time = 0.0744 ms, square: 32, splice: 128 fft = 320K, ave time = 0.0743 ms, square: 64, splice: 128 fft = 320K, ave time = 0.0742 ms, square: 128, splice: 128 fft = 320K, ave time = 0.0741 ms, square: 256, splice: 128 fft = 320K, ave time = 0.0741 ms, square: 512, splice: 128 fft = 320K, ave time = 0.0745 ms, square: 1024, splice: 128 fft = 320K, ave time = 0.0739 ms, square: 256, splice: 32 fft = 320K, ave time = 0.0740 ms, square: 256, splice: 64 fft = 320K, ave time = 0.0741 ms, square: 256, splice: 128 fft = 320K, ave time = 0.0745 ms, square: 256, splice: 256 fft = 320K, ave time = 0.0746 ms, square: 256, splice: 512 fft = 320K, ave time = 0.0747 ms, square: 256, splice: 1024 fft = 320K, min time = 0.0739 ms, square: 256, splice: 32 fft = 343K, ave time = 0.0698 ms, square: 32, splice: 128 fft = 343K, ave time = 0.0697 ms, square: 64, splice: 128 fft = 343K, ave time = 0.0695 ms, square: 128, splice: 128 fft = 343K, ave time = 0.0696 ms, square: 256, splice: 128 fft = 343K, ave time = 0.0693 ms, square: 128, splice: 32 fft = 343K, ave time = 0.0694 ms, square: 128, splice: 64 fft = 343K, ave time = 0.0695 ms, square: 128, splice: 128 fft = 343K, ave time = 0.0698 ms, square: 128, splice: 256 fft = 343K, ave time = 0.0701 ms, square: 128, splice: 512 fft = 343K, ave time = 0.0701 ms, square: 128, splice: 1024 fft = 343K, min time = 0.0693 ms, square: 128, splice: 32 fft = 512K, ave time = 0.0770 ms, square: 32, splice: 128 fft = 512K, ave time = 0.0770 ms, square: 64, splice: 128 fft = 512K, ave time = 0.0771 ms, square: 128, splice: 128 fft = 512K, ave time = 0.0771 ms, square: 256, splice: 128 fft = 512K, ave time = 0.0776 ms, square: 512, splice: 128 fft = 512K, ave time = 0.0786 ms, square: 1024, splice: 128 fft = 512K, ave time = 0.0768 ms, square: 32, splice: 32 fft = 512K, ave time = 0.0769 ms, square: 32, splice: 64 fft = 512K, ave time = 0.0770 ms, square: 32, splice: 128 fft = 512K, ave time = 0.0773 ms, square: 32, splice: 256 fft = 512K, ave time = 0.0780 ms, square: 32, splice: 512 fft = 512K, ave time = 0.0780 ms, square: 32, splice: 1024 fft = 512K, min time = 0.0768 ms, square: 32, splice: 32 fft = 625K, ave time = 0.0937 ms, square: 32, splice: 128 fft = 625K, ave time = 0.0938 ms, square: 64, splice: 128 fft = 625K, ave time = 0.0943 ms, square: 128, splice: 128 fft = 625K, ave time = 0.0943 ms, square: 256, splice: 128 fft = 625K, ave time = 0.0935 ms, square: 32, splice: 32 fft = 625K, ave time = 0.0936 ms, square: 32, splice: 64 fft = 625K, ave time = 0.0937 ms, square: 32, splice: 128 fft = 625K, ave time = 0.0941 ms, square: 32, splice: 256 fft = 625K, ave time = 0.0947 ms, square: 32, splice: 512 fft = 625K, ave time = 0.0950 ms, square: 32, splice: 1024 fft = 625K, min time = 0.0935 ms, square: 32, splice: 32 fft = 640K, ave time = 0.1129 ms, square: 32, splice: 128 fft = 640K, ave time = 0.1127 ms, square: 64, splice: 128 fft = 640K, ave time = 0.1126 ms, square: 128, splice: 128 fft = 640K, ave time = 0.1125 ms, square: 256, splice: 128 fft = 640K, ave time = 0.1123 ms, square: 512, splice: 128 fft = 640K, ave time = 0.1132 ms, square: 1024, splice: 128 fft = 640K, ave time = 0.1121 ms, square: 512, splice: 32 fft = 640K, ave time = 0.1122 ms, square: 512, splice: 64 fft = 640K, ave time = 0.1123 ms, square: 512, splice: 128 fft = 640K, ave time = 0.1127 ms, square: 512, splice: 256 fft = 640K, ave time = 0.1134 ms, square: 512, splice: 512 fft = 640K, ave time = 0.1137 ms, square: 512, splice: 1024 fft = 640K, min time = 0.1121 ms, square: 512, splice: 32 fft = 686K, ave time = 0.1164 ms, square: 32, splice: 128 fft = 686K, ave time = 0.1159 ms, square: 64, splice: 128 fft = 686K, ave time = 0.1167 ms, square: 128, splice: 128 fft = 686K, ave time = 0.1165 ms, square: 256, splice: 128 fft = 686K, ave time = 0.1168 ms, square: 512, splice: 128 fft = 686K, ave time = 0.1158 ms, square: 64, splice: 32 fft = 686K, ave time = 0.1159 ms, square: 64, splice: 64 fft = 686K, ave time = 0.1159 ms, square: 64, splice: 128 fft = 686K, ave time = 0.1162 ms, square: 64, splice: 256 fft = 686K, ave time = 0.1169 ms, square: 64, splice: 512 fft = 686K, ave time = 0.1174 ms, square: 64, splice: 1024 fft = 686K, min time = 0.1158 ms, square: 64, splice: 32 fft = 729K, ave time = 0.1303 ms, square: 32, splice: 128 fft = 729K, ave time = 0.1316 ms, square: 64, splice: 128 fft = 729K, ave time = 0.1324 ms, square: 128, splice: 128 fft = 729K, ave time = 0.1324 ms, square: 256, splice: 128 fft = 729K, ave time = 0.1301 ms, square: 32, splice: 32 fft = 729K, ave time = 0.1302 ms, square: 32, splice: 64 fft = 729K, ave time = 0.1303 ms, square: 32, splice: 128 fft = 729K, ave time = 0.1306 ms, square: 32, splice: 256 fft = 729K, ave time = 0.1313 ms, square: 32, splice: 512 fft = 729K, ave time = 0.1319 ms, square: 32, splice: 1024 fft = 729K, min time = 0.1301 ms, square: 32, splice: 32 fft = 800K, ave time = 0.1377 ms, square: 32, splice: 128 fft = 800K, ave time = 0.1394 ms, square: 64, splice: 128 fft = 800K, ave time = 0.1394 ms, square: 128, splice: 128 fft = 800K, ave time = 0.1392 ms, square: 256, splice: 128 fft = 800K, ave time = 0.1393 ms, square: 512, splice: 128 fft = 800K, ave time = 0.1411 ms, square: 1024, splice: 128 fft = 800K, ave time = 0.1374 ms, square: 32, splice: 32 fft = 800K, ave time = 0.1376 ms, square: 32, splice: 64 fft = 800K, ave time = 0.1377 ms, square: 32, splice: 128 fft = 800K, ave time = 0.1380 ms, square: 32, splice: 256 fft = 800K, ave time = 0.1387 ms, square: 32, splice: 512 fft = 800K, ave time = 0.1396 ms, square: 32, splice: 1024 fft = 800K, min time = 0.1374 ms, square: 32, splice: 32 fft = 864K, ave time = 0.1584 ms, square: 32, splice: 128 fft = 864K, ave time = 0.1604 ms, square: 64, splice: 128 fft = 864K, ave time = 0.1605 ms, square: 128, splice: 128 fft = 864K, ave time = 0.1606 ms, square: 256, splice: 128 fft = 864K, ave time = 0.1609 ms, square: 512, splice: 128 fft = 864K, ave time = 0.1629 ms, square: 1024, splice: 128 fft = 864K, ave time = 0.1582 ms, square: 32, splice: 32 fft = 864K, ave time = 0.1583 ms, square: 32, splice: 64 fft = 864K, ave time = 0.1584 ms, square: 32, splice: 128 fft = 864K, ave time = 0.1587 ms, square: 32, splice: 256 fft = 864K, ave time = 0.1594 ms, square: 32, splice: 512 fft = 864K, ave time = 0.1605 ms, square: 32, splice: 1024 fft = 864K, min time = 0.1582 ms, square: 32, splice: 32 fft = 896K, ave time = 0.1748 ms, square: 32, splice: 128 fft = 896K, ave time = 0.1771 ms, square: 64, splice: 128 fft = 896K, ave time = 0.1772 ms, square: 128, splice: 128 fft = 896K, ave time = 0.1775 ms, square: 256, splice: 128 fft = 896K, ave time = 0.1787 ms, square: 512, splice: 128 fft = 896K, ave time = 0.1799 ms, square: 1024, splice: 128 fft = 896K, ave time = 0.1746 ms, square: 32, splice: 32 fft = 896K, ave time = 0.1747 ms, square: 32, splice: 64 fft = 896K, ave time = 0.1748 ms, square: 32, splice: 128 fft = 896K, ave time = 0.1752 ms, square: 32, splice: 256 fft = 896K, ave time = 0.1758 ms, square: 32, splice: 512 fft = 896K, ave time = 0.1771 ms, square: 32, splice: 1024 fft = 896K, min time = 0.1746 ms, square: 32, splice: 32 fft = 1024K, ave time = 0.1916 ms, square: 32, splice: 128 fft = 1024K, ave time = 0.1937 ms, square: 64, splice: 128 fft = 1024K, ave time = 0.1944 ms, square: 128, splice: 128 fft = 1024K, ave time = 0.1943 ms, square: 256, splice: 128 fft = 1024K, ave time = 0.1954 ms, square: 512, splice: 128 fft = 1024K, ave time = 0.1974 ms, square: 1024, splice: 128 fft = 1024K, ave time = 0.1913 ms, square: 32, splice: 32 fft = 1024K, ave time = 0.1915 ms, square: 32, splice: 64 fft = 1024K, ave time = 0.1915 ms, square: 32, splice: 128 fft = 1024K, ave time = 0.1918 ms, square: 32, splice: 256 fft = 1024K, ave time = 0.1925 ms, square: 32, splice: 512 fft = 1024K, ave time = 0.1941 ms, square: 32, splice: 1024 fft = 1024K, min time = 0.1913 ms, square: 32, splice: 32 fft = 1152K, ave time = 0.2282 ms, square: 32, splice: 128 fft = 1152K, ave time = 0.2310 ms, square: 64, splice: 128 fft = 1152K, ave time = 0.2312 ms, square: 128, splice: 128 fft = 1152K, ave time = 0.2311 ms, square: 256, splice: 128 fft = 1152K, ave time = 0.2325 ms, square: 512, splice: 128 fft = 1152K, ave time = 0.2344 ms, square: 1024, splice: 128 fft = 1152K, ave time = 0.2280 ms, square: 32, splice: 32 fft = 1152K, ave time = 0.2282 ms, square: 32, splice: 64 fft = 1152K, ave time = 0.2281 ms, square: 32, splice: 128 fft = 1152K, ave time = 0.2285 ms, square: 32, splice: 256 fft = 1152K, ave time = 0.2291 ms, square: 32, splice: 512 fft = 1152K, ave time = 0.2307 ms, square: 32, splice: 1024 fft = 1152K, min time = 0.2280 ms, square: 32, splice: 32 fft = 1250K, ave time = 0.2476 ms, square: 32, splice: 128 fft = 1250K, ave time = 0.2509 ms, square: 64, splice: 128 fft = 1250K, ave time = 0.2513 ms, square: 128, splice: 128 fft = 1250K, ave time = 0.2514 ms, square: 256, splice: 128 fft = 1250K, ave time = 0.2536 ms, square: 512, splice: 128 fft = 1250K, ave time = 0.2475 ms, square: 32, splice: 32 fft = 1250K, ave time = 0.2476 ms, square: 32, splice: 64 fft = 1250K, ave time = 0.2477 ms, square: 32, splice: 128 fft = 1250K, ave time = 0.2479 ms, square: 32, splice: 256 fft = 1250K, ave time = 0.2483 ms, square: 32, splice: 512 fft = 1250K, ave time = 0.2499 ms, square: 32, splice: 1024 fft = 1250K, min time = 0.2475 ms, square: 32, splice: 32 fft = 1280K, ave time = 0.2661 ms, square: 32, splice: 128 fft = 1280K, ave time = 0.2695 ms, square: 64, splice: 128 fft = 1280K, ave time = 0.2696 ms, square: 128, splice: 128 fft = 1280K, ave time = 0.2698 ms, square: 256, splice: 128 fft = 1280K, ave time = 0.2710 ms, square: 512, splice: 128 fft = 1280K, ave time = 0.2726 ms, square: 1024, splice: 128 fft = 1280K, ave time = 0.2659 ms, square: 32, splice: 32 fft = 1280K, ave time = 0.2659 ms, square: 32, splice: 64 fft = 1280K, ave time = 0.2661 ms, square: 32, splice: 128 fft = 1280K, ave time = 0.2663 ms, square: 32, splice: 256 fft = 1280K, ave time = 0.2669 ms, square: 32, splice: 512 fft = 1280K, ave time = 0.2684 ms, square: 32, splice: 1024 fft = 1280K, min time = 0.2659 ms, square: 32, splice: 32 fft = 1296K, ave time = 0.2708 ms, square: 32, splice: 128 fft = 1296K, ave time = 0.2744 ms, square: 64, splice: 128 fft = 1296K, ave time = 0.2746 ms, square: 128, splice: 128 fft = 1296K, ave time = 0.2745 ms, square: 256, splice: 128 fft = 1296K, ave time = 0.2761 ms, square: 512, splice: 128 fft = 1296K, ave time = 0.2777 ms, square: 1024, splice: 128 fft = 1296K, ave time = 0.2707 ms, square: 32, splice: 32 fft = 1296K, ave time = 0.2707 ms, square: 32, splice: 64 fft = 1296K, ave time = 0.2708 ms, square: 32, splice: 128 fft = 1296K, ave time = 0.2711 ms, square: 32, splice: 256 fft = 1296K, ave time = 0.2717 ms, square: 32, splice: 512 fft = 1296K, ave time = 0.2732 ms, square: 32, splice: 1024 fft = 1296K, min time = 0.2707 ms, square: 32, splice: 32 fft = 1372K, ave time = 0.2867 ms, square: 32, splice: 128 fft = 1372K, ave time = 0.2905 ms, square: 64, splice: 128 fft = 1372K, ave time = 0.2907 ms, square: 128, splice: 128 fft = 1372K, ave time = 0.2911 ms, square: 256, splice: 128 fft = 1372K, ave time = 0.2930 ms, square: 512, splice: 128 fft = 1372K, ave time = 0.2958 ms, square: 1024, splice: 128 fft = 1372K, ave time = 0.2864 ms, square: 32, splice: 32 fft = 1372K, ave time = 0.2865 ms, square: 32, splice: 64 fft = 1372K, ave time = 0.2866 ms, square: 32, splice: 128 fft = 1372K, ave time = 0.2869 ms, square: 32, splice: 256 fft = 1372K, ave time = 0.2874 ms, square: 32, splice: 512 fft = 1372K, ave time = 0.2890 ms, square: 32, splice: 1024 fft = 1372K, min time = 0.2864 ms, square: 32, splice: 32 fft = 1458K, ave time = 0.2992 ms, square: 32, splice: 128 fft = 1458K, ave time = 0.3035 ms, square: 64, splice: 128 fft = 1458K, ave time = 0.3035 ms, square: 128, splice: 128 fft = 1458K, ave time = 0.3040 ms, square: 256, splice: 128 fft = 1458K, ave time = 0.3054 ms, square: 512, splice: 128 fft = 1458K, ave time = 0.2992 ms, square: 32, splice: 32 fft = 1458K, ave time = 0.2992 ms, square: 32, splice: 64 fft = 1458K, ave time = 0.2991 ms, square: 32, splice: 128 fft = 1458K, ave time = 0.2995 ms, square: 32, splice: 256 fft = 1458K, ave time = 0.2999 ms, square: 32, splice: 512 fft = 1458K, ave time = 0.3013 ms, square: 32, splice: 1024 fft = 1458K, min time = 0.2991 ms, square: 32, splice: 128 fft = 1568K, ave time = 0.3074 ms, square: 32, splice: 128 fft = 1568K, ave time = 0.3118 ms, square: 64, splice: 128 fft = 1568K, ave time = 0.3120 ms, square: 128, splice: 128 fft = 1568K, ave time = 0.3121 ms, square: 256, splice: 128 fft = 1568K, ave time = 0.3139 ms, square: 512, splice: 128 fft = 1568K, ave time = 0.3164 ms, square: 1024, splice: 128 fft = 1568K, ave time = 0.3073 ms, square: 32, splice: 32 fft = 1568K, ave time = 0.3072 ms, square: 32, splice: 64 fft = 1568K, ave time = 0.3075 ms, square: 32, splice: 128 fft = 1568K, ave time = 0.3076 ms, square: 32, splice: 256 fft = 1568K, ave time = 0.3082 ms, square: 32, splice: 512 fft = 1568K, ave time = 0.3095 ms, square: 32, splice: 1024 fft = 1568K, min time = 0.3072 ms, square: 32, splice: 64 fft = 1600K, ave time = 0.3235 ms, square: 32, splice: 128 fft = 1600K, ave time = 0.3282 ms, square: 64, splice: 128 fft = 1600K, ave time = 0.3280 ms, square: 128, splice: 128 fft = 1600K, ave time = 0.3280 ms, square: 256, splice: 128 fft = 1600K, ave time = 0.3296 ms, square: 512, splice: 128 fft = 1600K, ave time = 0.3322 ms, square: 1024, splice: 128 fft = 1600K, ave time = 0.3235 ms, square: 32, splice: 32 fft = 1600K, ave time = 0.3235 ms, square: 32, splice: 64 fft = 1600K, ave time = 0.3236 ms, square: 32, splice: 128 fft = 1600K, ave time = 0.3239 ms, square: 32, splice: 256 fft = 1600K, ave time = 0.3246 ms, square: 32, splice: 512 fft = 1600K, ave time = 0.3258 ms, square: 32, splice: 1024 fft = 1600K, min time = 0.3235 ms, square: 32, splice: 32 fft = 1728K, ave time = 0.3500 ms, square: 32, splice: 128 fft = 1728K, ave time = 0.3550 ms, square: 64, splice: 128 fft = 1728K, ave time = 0.3551 ms, square: 128, splice: 128 fft = 1728K, ave time = 0.3547 ms, square: 256, splice: 128 fft = 1728K, ave time = 0.3564 ms, square: 512, splice: 128 fft = 1728K, ave time = 0.3586 ms, square: 1024, splice: 128 fft = 1728K, ave time = 0.3500 ms, square: 32, splice: 32 fft = 1728K, ave time = 0.3499 ms, square: 32, splice: 64 fft = 1728K, ave time = 0.3501 ms, square: 32, splice: 128 fft = 1728K, ave time = 0.3504 ms, square: 32, splice: 256 fft = 1728K, ave time = 0.3509 ms, square: 32, splice: 512 fft = 1728K, ave time = 0.3524 ms, square: 32, splice: 1024 fft = 1728K, min time = 0.3499 ms, square: 32, splice: 64 fft = 2048K, ave time = 0.3728 ms, square: 32, splice: 128 fft = 2048K, ave time = 0.3789 ms, square: 64, splice: 128 fft = 2048K, ave time = 0.3790 ms, square: 128, splice: 128 fft = 2048K, ave time = 0.3791 ms, square: 256, splice: 128 fft = 2048K, ave time = 0.3812 ms, square: 512, splice: 128 fft = 2048K, ave time = 0.3837 ms, square: 1024, splice: 128 fft = 2048K, ave time = 0.3727 ms, square: 32, splice: 32 fft = 2048K, ave time = 0.3726 ms, square: 32, splice: 64 fft = 2048K, ave time = 0.3728 ms, square: 32, splice: 128 fft = 2048K, ave time = 0.3731 ms, square: 32, splice: 256 fft = 2048K, ave time = 0.3737 ms, square: 32, splice: 512 fft = 2048K, ave time = 0.3751 ms, square: 32, splice: 1024 fft = 2048K, min time = 0.3726 ms, square: 32, splice: 64 fft = 2187K, ave time = 0.4777 ms, square: 32, splice: 128 fft = 2187K, ave time = 0.4842 ms, square: 64, splice: 128 fft = 2187K, ave time = 0.4843 ms, square: 128, splice: 128 fft = 2187K, ave time = 0.4842 ms, square: 256, splice: 128 fft = 2187K, ave time = 0.4774 ms, square: 32, splice: 32 fft = 2187K, ave time = 0.4776 ms, square: 32, splice: 64 fft = 2187K, ave time = 0.4776 ms, square: 32, splice: 128 fft = 2187K, ave time = 0.4778 ms, square: 32, splice: 256 fft = 2187K, ave time = 0.4785 ms, square: 32, splice: 512 fft = 2187K, ave time = 0.4798 ms, square: 32, splice: 1024 fft = 2187K, min time = 0.4774 ms, square: 32, splice: 32 fft = 2304K, ave time = 0.4812 ms, square: 32, splice: 128 fft = 2304K, ave time = 0.4881 ms, square: 64, splice: 128 fft = 2304K, ave time = 0.4881 ms, square: 128, splice: 128 fft = 2304K, ave time = 0.4883 ms, square: 256, splice: 128 fft = 2304K, ave time = 0.4899 ms, square: 512, splice: 128 fft = 2304K, ave time = 0.4926 ms, square: 1024, splice: 128 fft = 2304K, ave time = 0.4811 ms, square: 32, splice: 32 fft = 2304K, ave time = 0.4810 ms, square: 32, splice: 64 fft = 2304K, ave time = 0.4811 ms, square: 32, splice: 128 fft = 2304K, ave time = 0.4814 ms, square: 32, splice: 256 fft = 2304K, ave time = 0.4820 ms, square: 32, splice: 512 fft = 2304K, ave time = 0.4833 ms, square: 32, splice: 1024 fft = 2304K, min time = 0.4810 ms, square: 32, splice: 64 fft = 2401K, ave time = 0.4970 ms, square: 32, splice: 128 fft = 2401K, ave time = 0.5038 ms, square: 64, splice: 128 fft = 2401K, ave time = 0.5039 ms, square: 128, splice: 128 fft = 2401K, ave time = 0.5037 ms, square: 256, splice: 128 fft = 2401K, ave time = 0.4969 ms, square: 32, splice: 32 fft = 2401K, ave time = 0.4968 ms, square: 32, splice: 64 fft = 2401K, ave time = 0.4970 ms, square: 32, splice: 128 fft = 2401K, ave time = 0.4972 ms, square: 32, splice: 256 fft = 2401K, ave time = 0.4978 ms, square: 32, splice: 512 fft = 2401K, ave time = 0.4991 ms, square: 32, splice: 1024 fft = 2401K, min time = 0.4968 ms, square: 32, splice: 64 fft = 2592K, ave time = 0.4988 ms, square: 32, splice: 128 fft = 2592K, ave time = 0.5068 ms, square: 64, splice: 128 fft = 2592K, ave time = 0.5069 ms, square: 128, splice: 128 fft = 2592K, ave time = 0.5065 ms, square: 256, splice: 128 fft = 2592K, ave time = 0.5091 ms, square: 512, splice: 128 fft = 2592K, ave time = 0.5118 ms, square: 1024, splice: 128 fft = 2592K, ave time = 0.4987 ms, square: 32, splice: 32 fft = 2592K, ave time = 0.4988 ms, square: 32, splice: 64 fft = 2592K, ave time = 0.4989 ms, square: 32, splice: 128 fft = 2592K, ave time = 0.4990 ms, square: 32, splice: 256 fft = 2592K, ave time = 0.4996 ms, square: 32, splice: 512 fft = 2592K, ave time = 0.5009 ms, square: 32, splice: 1024 fft = 2592K, min time = 0.4987 ms, square: 32, splice: 32 fft = 2744K, ave time = 0.5291 ms, square: 32, splice: 128 fft = 2744K, ave time = 0.5382 ms, square: 64, splice: 128 fft = 2744K, ave time = 0.5378 ms, square: 128, splice: 128 fft = 2744K, ave time = 0.5381 ms, square: 256, splice: 128 fft = 2744K, ave time = 0.5409 ms, square: 512, splice: 128 fft = 2744K, ave time = 0.5444 ms, square: 1024, splice: 128 fft = 2744K, ave time = 0.5292 ms, square: 32, splice: 32 fft = 2744K, ave time = 0.5291 ms, square: 32, splice: 64 fft = 2744K, ave time = 0.5292 ms, square: 32, splice: 128 fft = 2744K, ave time = 0.5294 ms, square: 32, splice: 256 fft = 2744K, ave time = 0.5300 ms, square: 32, splice: 512 fft = 2744K, ave time = 0.5312 ms, square: 32, splice: 1024 fft = 2744K, min time = 0.5291 ms, square: 32, splice: 64 fft = 2916K, ave time = 0.5945 ms, square: 32, splice: 128 fft = 2916K, ave time = 0.6034 ms, square: 64, splice: 128 fft = 2916K, ave time = 0.6012 ms, square: 128, splice: 128 fft = 2916K, ave time = 0.6031 ms, square: 256, splice: 128 fft = 2916K, ave time = 0.6038 ms, square: 512, splice: 128 fft = 2916K, ave time = 0.6081 ms, square: 1024, splice: 128 fft = 2916K, ave time = 0.5943 ms, square: 32, splice: 32 fft = 2916K, ave time = 0.5942 ms, square: 32, splice: 64 fft = 2916K, ave time = 0.5945 ms, square: 32, splice: 128 fft = 2916K, ave time = 0.5943 ms, square: 32, splice: 256 fft = 2916K, ave time = 0.5948 ms, square: 32, splice: 512 fft = 2916K, ave time = 0.5957 ms, square: 32, splice: 1024 fft = 2916K, min time = 0.5942 ms, square: 32, splice: 64 fft = 3136K, ave time = 0.6067 ms, square: 32, splice: 128 fft = 3136K, ave time = 0.6160 ms, square: 64, splice: 128 fft = 3136K, ave time = 0.6161 ms, square: 128, splice: 128 fft = 3136K, ave time = 0.6162 ms, square: 256, splice: 128 fft = 3136K, ave time = 0.6190 ms, square: 512, splice: 128 fft = 3136K, ave time = 0.6216 ms, square: 1024, splice: 128 fft = 3136K, ave time = 0.6066 ms, square: 32, splice: 32 fft = 3136K, ave time = 0.6066 ms, square: 32, splice: 64 fft = 3136K, ave time = 0.6066 ms, square: 32, splice: 128 fft = 3136K, ave time = 0.6070 ms, square: 32, splice: 256 fft = 3136K, ave time = 0.6075 ms, square: 32, splice: 512 fft = 3136K, ave time = 0.6088 ms, square: 32, splice: 1024 fft = 3136K, min time = 0.6066 ms, square: 32, splice: 64 fft = 3200K, ave time = 0.6291 ms, square: 32, splice: 128 fft = 3200K, ave time = 0.6388 ms, square: 64, splice: 128 fft = 3200K, ave time = 0.6387 ms, square: 128, splice: 128 fft = 3200K, ave time = 0.6387 ms, square: 256, splice: 128 fft = 3200K, ave time = 0.6412 ms, square: 512, splice: 128 fft = 3200K, ave time = 0.6440 ms, square: 1024, splice: 128 fft = 3200K, ave time = 0.6291 ms, square: 32, splice: 32 fft = 3200K, ave time = 0.6290 ms, square: 32, splice: 64 fft = 3200K, ave time = 0.6291 ms, square: 32, splice: 128 fft = 3200K, ave time = 0.6292 ms, square: 32, splice: 256 fft = 3200K, ave time = 0.6298 ms, square: 32, splice: 512 fft = 3200K, ave time = 0.6313 ms, square: 32, splice: 1024 fft = 3200K, min time = 0.6290 ms, square: 32, splice: 64 fft = 3456K, ave time = 0.6734 ms, square: 32, splice: 128 fft = 3456K, ave time = 0.6842 ms, square: 64, splice: 128 fft = 3456K, ave time = 0.6840 ms, square: 128, splice: 128 fft = 3456K, ave time = 0.6841 ms, square: 256, splice: 128 fft = 3456K, ave time = 0.6865 ms, square: 512, splice: 128 fft = 3456K, ave time = 0.6899 ms, square: 1024, splice: 128 fft = 3456K, ave time = 0.6734 ms, square: 32, splice: 32 fft = 3456K, ave time = 0.6735 ms, square: 32, splice: 64 fft = 3456K, ave time = 0.6735 ms, square: 32, splice: 128 fft = 3456K, ave time = 0.6736 ms, square: 32, splice: 256 fft = 3456K, ave time = 0.6743 ms, square: 32, splice: 512 fft = 3456K, ave time = 0.6756 ms, square: 32, splice: 1024 fft = 3456K, min time = 0.6734 ms, square: 32, splice: 32 fft = 3584K, ave time = 0.7278 ms, square: 32, splice: 128 fft = 3584K, ave time = 0.7389 ms, square: 64, splice: 128 fft = 3584K, ave time = 0.7390 ms, square: 128, splice: 128 fft = 3584K, ave time = 0.7389 ms, square: 256, splice: 128 fft = 3584K, ave time = 0.7414 ms, square: 512, splice: 128 fft = 3584K, ave time = 0.7448 ms, square: 1024, splice: 128 fft = 3584K, ave time = 0.7278 ms, square: 32, splice: 32 fft = 3584K, ave time = 0.7278 ms, square: 32, splice: 64 fft = 3584K, ave time = 0.7278 ms, square: 32, splice: 128 fft = 3584K, ave time = 0.7281 ms, square: 32, splice: 256 fft = 3584K, ave time = 0.7286 ms, square: 32, splice: 512 fft = 3584K, ave time = 0.7301 ms, square: 32, splice: 1024 fft = 3584K, min time = 0.7278 ms, square: 32, splice: 32 fft = 4096K, ave time = 0.7344 ms, square: 32, splice: 128 fft = 4096K, ave time = 0.7475 ms, square: 64, splice: 128 fft = 4096K, ave time = 0.7473 ms, square: 128, splice: 128 fft = 4096K, ave time = 0.7474 ms, square: 256, splice: 128 fft = 4096K, ave time = 0.7508 ms, square: 512, splice: 128 fft = 4096K, ave time = 0.7544 ms, square: 1024, splice: 128 fft = 4096K, ave time = 0.7343 ms, square: 32, splice: 32 fft = 4096K, ave time = 0.7344 ms, square: 32, splice: 64 fft = 4096K, ave time = 0.7344 ms, square: 32, splice: 128 fft = 4096K, ave time = 0.7346 ms, square: 32, splice: 256 fft = 4096K, ave time = 0.7352 ms, square: 32, splice: 512 fft = 4096K, ave time = 0.7365 ms, square: 32, splice: 1024 fft = 4096K, min time = 0.7343 ms, square: 32, splice: 32 fft = 4608K, ave time = 0.9121 ms, square: 32, splice: 128 fft = 4608K, ave time = 0.9272 ms, square: 64, splice: 128 fft = 4608K, ave time = 0.9269 ms, square: 128, splice: 128 fft = 4608K, ave time = 0.9266 ms, square: 256, splice: 128 fft = 4608K, ave time = 0.9299 ms, square: 512, splice: 128 fft = 4608K, ave time = 0.9336 ms, square: 1024, splice: 128 fft = 4608K, ave time = 0.9122 ms, square: 32, splice: 32 fft = 4608K, ave time = 0.9123 ms, square: 32, splice: 64 fft = 4608K, ave time = 0.9122 ms, square: 32, splice: 128 fft = 4608K, ave time = 0.9123 ms, square: 32, splice: 256 fft = 4608K, ave time = 0.9130 ms, square: 32, splice: 512 fft = 4608K, ave time = 0.9145 ms, square: 32, splice: 1024 fft = 4608K, min time = 0.9122 ms, square: 32, splice: 32 fft = 5184K, ave time = 0.9698 ms, square: 32, splice: 128 fft = 5184K, ave time = 0.9858 ms, square: 64, splice: 128 fft = 5184K, ave time = 0.9855 ms, square: 128, splice: 128 fft = 5184K, ave time = 0.9857 ms, square: 256, splice: 128 fft = 5184K, ave time = 0.9896 ms, square: 512, splice: 128 fft = 5184K, ave time = 0.9935 ms, square: 1024, splice: 128 fft = 5184K, ave time = 0.9698 ms, square: 32, splice: 32 fft = 5184K, ave time = 0.9698 ms, square: 32, splice: 64 fft = 5184K, ave time = 0.9698 ms, square: 32, splice: 128 fft = 5184K, ave time = 0.9700 ms, square: 32, splice: 256 fft = 5184K, ave time = 0.9706 ms, square: 32, splice: 512 fft = 5184K, ave time = 0.9721 ms, square: 32, splice: 1024 fft = 5184K, min time = 0.9698 ms, square: 32, splice: 128 fft = 5488K, ave time = 1.0147 ms, square: 32, splice: 128 fft = 5488K, ave time = 1.0324 ms, square: 64, splice: 128 fft = 5488K, ave time = 1.0321 ms, square: 128, splice: 128 fft = 5488K, ave time = 1.0321 ms, square: 256, splice: 128 fft = 5488K, ave time = 1.0363 ms, square: 512, splice: 128 fft = 5488K, ave time = 1.0402 ms, square: 1024, splice: 128 fft = 5488K, ave time = 1.0145 ms, square: 32, splice: 32 fft = 5488K, ave time = 1.0146 ms, square: 32, splice: 64 fft = 5488K, ave time = 1.0146 ms, square: 32, splice: 128 fft = 5488K, ave time = 1.0150 ms, square: 32, splice: 256 fft = 5488K, ave time = 1.0156 ms, square: 32, splice: 512 fft = 5488K, ave time = 1.0170 ms, square: 32, splice: 1024 fft = 5488K, min time = 1.0145 ms, square: 32, splice: 32 fft = 6272K, ave time = 1.1746 ms, square: 32, splice: 128 fft = 6272K, ave time = 1.1944 ms, square: 64, splice: 128 fft = 6272K, ave time = 1.1942 ms, square: 128, splice: 128 fft = 6272K, ave time = 1.1942 ms, square: 256, splice: 128 fft = 6272K, ave time = 1.1983 ms, square: 512, splice: 128 fft = 6272K, ave time = 1.2027 ms, square: 1024, splice: 128 fft = 6272K, ave time = 1.1745 ms, square: 32, splice: 32 fft = 6272K, ave time = 1.1746 ms, square: 32, splice: 64 fft = 6272K, ave time = 1.1746 ms, square: 32, splice: 128 fft = 6272K, ave time = 1.1748 ms, square: 32, splice: 256 fft = 6272K, ave time = 1.1752 ms, square: 32, splice: 512 fft = 6272K, ave time = 1.1767 ms, square: 32, splice: 1024 fft = 6272K, min time = 1.1745 ms, square: 32, splice: 32 fft = 6400K, ave time = 1.2031 ms, square: 32, splice: 128 fft = 6400K, ave time = 1.2233 ms, square: 64, splice: 128 fft = 6400K, ave time = 1.2230 ms, square: 128, splice: 128 fft = 6400K, ave time = 1.2231 ms, square: 256, splice: 128 fft = 6400K, ave time = 1.2279 ms, square: 512, splice: 128 fft = 6400K, ave time = 1.2319 ms, square: 1024, splice: 128 fft = 6400K, ave time = 1.2028 ms, square: 32, splice: 32 fft = 6400K, ave time = 1.2028 ms, square: 32, splice: 64 fft = 6400K, ave time = 1.2030 ms, square: 32, splice: 128 fft = 6400K, ave time = 1.2032 ms, square: 32, splice: 256 fft = 6400K, ave time = 1.2037 ms, square: 32, splice: 512 fft = 6400K, ave time = 1.2050 ms, square: 32, splice: 1024 fft = 6400K, min time = 1.2028 ms, square: 32, splice: 64 fft = 6561K, ave time = 1.2898 ms, square: 32, splice: 128 fft = 6561K, ave time = 1.3100 ms, square: 64, splice: 128 fft = 6561K, ave time = 1.3097 ms, square: 128, splice: 128 fft = 6561K, ave time = 1.3099 ms, square: 256, splice: 128 fft = 6561K, ave time = 1.2895 ms, square: 32, splice: 32 fft = 6561K, ave time = 1.2897 ms, square: 32, splice: 64 fft = 6561K, ave time = 1.2899 ms, square: 32, splice: 128 fft = 6561K, ave time = 1.2899 ms, square: 32, splice: 256 fft = 6561K, ave time = 1.2906 ms, square: 32, splice: 512 fft = 6561K, ave time = 1.2921 ms, square: 32, splice: 1024 fft = 6561K, min time = 1.2895 ms, square: 32, splice: 32 fft = 6912K, ave time = 1.2955 ms, square: 32, splice: 128 fft = 6912K, ave time = 1.3176 ms, square: 64, splice: 128 fft = 6912K, ave time = 1.3173 ms, square: 128, splice: 128 fft = 6912K, ave time = 1.3169 ms, square: 256, splice: 128 fft = 6912K, ave time = 1.3219 ms, square: 512, splice: 128 fft = 6912K, ave time = 1.3257 ms, square: 1024, splice: 128 fft = 6912K, ave time = 1.2952 ms, square: 32, splice: 32 fft = 6912K, ave time = 1.2953 ms, square: 32, splice: 64 fft = 6912K, ave time = 1.2954 ms, square: 32, splice: 128 fft = 6912K, ave time = 1.2955 ms, square: 32, splice: 256 fft = 6912K, ave time = 1.2959 ms, square: 32, splice: 512 fft = 6912K, ave time = 1.2975 ms, square: 32, splice: 1024 fft = 6912K, min time = 1.2952 ms, square: 32, splice: 32 fft = 8192K, ave time = 1.4682 ms, square: 32, splice: 128 fft = 8192K, ave time = 1.4947 ms, square: 64, splice: 128 fft = 8192K, ave time = 1.4944 ms, square: 128, splice: 128 fft = 8192K, ave time = 1.4946 ms, square: 256, splice: 128 fft = 8192K, ave time = 1.5001 ms, square: 512, splice: 128 fft = 8192K, ave time = 1.5048 ms, square: 1024, splice: 128 fft = 8192K, ave time = 1.4681 ms, square: 32, splice: 32 fft = 8192K, ave time = 1.4682 ms, square: 32, splice: 64 fft = 8192K, ave time = 1.4682 ms, square: 32, splice: 128 fft = 8192K, ave time = 1.4683 ms, square: 32, splice: 256 fft = 8192K, ave time = 1.4688 ms, square: 32, splice: 512 fft = 8192K, ave time = 1.4703 ms, square: 32, splice: 1024 fft = 8192K, min time = 1.4681 ms, square: 32, splice: 32 fft = 8575K, ave time = 1.6804 ms, square: 32, splice: 128 fft = 8575K, ave time = 1.7075 ms, square: 64, splice: 128 fft = 8575K, ave time = 1.7071 ms, square: 128, splice: 128 fft = 8575K, ave time = 1.7073 ms, square: 256, splice: 128 fft = 8575K, ave time = 1.6803 ms, square: 32, splice: 32 fft = 8575K, ave time = 1.6806 ms, square: 32, splice: 64 fft = 8575K, ave time = 1.6804 ms, square: 32, splice: 128 fft = 8575K, ave time = 1.6803 ms, square: 32, splice: 256 fft = 8575K, ave time = 1.6811 ms, square: 32, splice: 512 fft = 8575K, ave time = 1.6826 ms, square: 32, splice: 1024 fft = 8575K, min time = 1.6803 ms, square: 32, splice: 32 fft = 9261K, ave time = 1.8112 ms, square: 32, splice: 128 fft = 9261K, ave time = 1.8406 ms, square: 64, splice: 128 fft = 9261K, ave time = 1.8403 ms, square: 128, splice: 128 fft = 9261K, ave time = 1.8402 ms, square: 256, splice: 128 fft = 9261K, ave time = 1.8114 ms, square: 32, splice: 32 fft = 9261K, ave time = 1.8111 ms, square: 32, splice: 64 fft = 9261K, ave time = 1.8111 ms, square: 32, splice: 128 fft = 9261K, ave time = 1.8113 ms, square: 32, splice: 256 fft = 9261K, ave time = 1.8118 ms, square: 32, splice: 512 fft = 9261K, ave time = 1.8133 ms, square: 32, splice: 1024 fft = 9261K, min time = 1.8111 ms, square: 32, splice: 128 fft = 10368K, ave time = 1.9001 ms, square: 32, splice: 128 fft = 10368K, ave time = 1.9330 ms, square: 64, splice: 128 fft = 10368K, ave time = 1.9325 ms, square: 128, splice: 128 fft = 10368K, ave time = 1.9324 ms, square: 256, splice: 128 fft = 10368K, ave time = 1.9398 ms, square: 512, splice: 128 fft = 10368K, ave time = 1.9447 ms, square: 1024, splice: 128 fft = 10368K, ave time = 1.9000 ms, square: 32, splice: 32 fft = 10368K, ave time = 1.9000 ms, square: 32, splice: 64 fft = 10368K, ave time = 1.9002 ms, square: 32, splice: 128 fft = 10368K, ave time = 1.9003 ms, square: 32, splice: 256 fft = 10368K, ave time = 1.9006 ms, square: 32, splice: 512 fft = 10368K, ave time = 1.9020 ms, square: 32, splice: 1024 fft = 10368K, min time = 1.9000 ms, square: 32, splice: 32 fft = 10976K, ave time = 1.9665 ms, square: 32, splice: 128 fft = 10976K, ave time = 2.0025 ms, square: 64, splice: 128 fft = 10976K, ave time = 2.0021 ms, square: 128, splice: 128 fft = 10976K, ave time = 2.0017 ms, square: 256, splice: 128 fft = 10976K, ave time = 2.0090 ms, square: 512, splice: 128 fft = 10976K, ave time = 2.0148 ms, square: 1024, splice: 128 fft = 10976K, ave time = 1.9662 ms, square: 32, splice: 32 fft = 10976K, ave time = 1.9663 ms, square: 32, splice: 64 fft = 10976K, ave time = 1.9663 ms, square: 32, splice: 128 fft = 10976K, ave time = 1.9668 ms, square: 32, splice: 256 fft = 10976K, ave time = 1.9670 ms, square: 32, splice: 512 fft = 10976K, ave time = 1.9682 ms, square: 32, splice: 1024 fft = 10976K, min time = 1.9662 ms, square: 32, splice: 32 fft = 11907K, ave time = 2.2665 ms, square: 32, splice: 128 fft = 11907K, ave time = 2.3051 ms, square: 64, splice: 128 fft = 11907K, ave time = 2.3043 ms, square: 128, splice: 128 fft = 11907K, ave time = 2.3042 ms, square: 256, splice: 128 fft = 11907K, ave time = 2.2662 ms, square: 32, splice: 32 fft = 11907K, ave time = 2.2662 ms, square: 32, splice: 64 fft = 11907K, ave time = 2.2665 ms, square: 32, splice: 128 fft = 11907K, ave time = 2.2663 ms, square: 32, splice: 256 fft = 11907K, ave time = 2.2665 ms, square: 32, splice: 512 fft = 11907K, ave time = 2.2679 ms, square: 32, splice: 1024 fft = 11907K, min time = 2.2662 ms, square: 32, splice: 64 fft = 12544K, ave time = 2.2913 ms, square: 32, splice: 128 fft = 12544K, ave time = 2.3318 ms, square: 64, splice: 128 fft = 12544K, ave time = 2.3309 ms, square: 128, splice: 128 fft = 12544K, ave time = 2.3303 ms, square: 256, splice: 128 fft = 12544K, ave time = 2.3385 ms, square: 512, splice: 128 fft = 12544K, ave time = 2.3448 ms, square: 1024, splice: 128 fft = 12544K, ave time = 2.2913 ms, square: 32, splice: 32 fft = 12544K, ave time = 2.2913 ms, square: 32, splice: 64 fft = 12544K, ave time = 2.2912 ms, square: 32, splice: 128 fft = 12544K, ave time = 2.2913 ms, square: 32, splice: 256 fft = 12544K, ave time = 2.2917 ms, square: 32, splice: 512 fft = 12544K, ave time = 2.2928 ms, square: 32, splice: 1024 fft = 12544K, min time = 2.2912 ms, square: 32, splice: 128 fft = 12800K, ave time = 2.3202 ms, square: 32, splice: 128 fft = 12800K, ave time = 2.3617 ms, square: 64, splice: 128 fft = 12800K, ave time = 2.3608 ms, square: 128, splice: 128 fft = 12800K, ave time = 2.3602 ms, square: 256, splice: 128 fft = 12800K, ave time = 2.3694 ms, square: 512, splice: 128 fft = 12800K, ave time = 2.3751 ms, square: 1024, splice: 128 fft = 12800K, ave time = 2.3201 ms, square: 32, splice: 32 fft = 12800K, ave time = 2.3200 ms, square: 32, splice: 64 fft = 12800K, ave time = 2.3199 ms, square: 32, splice: 128 fft = 12800K, ave time = 2.3201 ms, square: 32, splice: 256 fft = 12800K, ave time = 2.3204 ms, square: 32, splice: 512 fft = 12800K, ave time = 2.3215 ms, square: 32, splice: 1024 fft = 12800K, min time = 2.3199 ms, square: 32, splice: 128 fft = 13824K, ave time = 2.5029 ms, square: 32, splice: 128 fft = 13824K, ave time = 2.5481 ms, square: 64, splice: 128 fft = 13824K, ave time = 2.5474 ms, square: 128, splice: 128 fft = 13824K, ave time = 2.5473 ms, square: 256, splice: 128 fft = 13824K, ave time = 2.5571 ms, square: 512, splice: 128 fft = 13824K, ave time = 2.5625 ms, square: 1024, splice: 128 fft = 13824K, ave time = 2.5027 ms, square: 32, splice: 32 fft = 13824K, ave time = 2.5027 ms, square: 32, splice: 64 fft = 13824K, ave time = 2.5028 ms, square: 32, splice: 128 fft = 13824K, ave time = 2.5028 ms, square: 32, splice: 256 fft = 13824K, ave time = 2.5026 ms, square: 32, splice: 512 fft = 13824K, ave time = 2.5040 ms, square: 32, splice: 1024 fft = 13824K, min time = 2.5026 ms, square: 32, splice: 512 fft = 15552K, ave time = 2.8866 ms, square: 32, splice: 128 fft = 15552K, ave time = 2.9367 ms, square: 64, splice: 128 fft = 15552K, ave time = 2.9358 ms, square: 128, splice: 128 fft = 15552K, ave time = 2.9355 ms, square: 256, splice: 128 fft = 15552K, ave time = 2.9454 ms, square: 512, splice: 128 fft = 15552K, ave time = 2.9521 ms, square: 1024, splice: 128 fft = 15552K, ave time = 2.8865 ms, square: 32, splice: 32 fft = 15552K, ave time = 2.8867 ms, square: 32, splice: 64 fft = 15552K, ave time = 2.8866 ms, square: 32, splice: 128 fft = 15552K, ave time = 2.8868 ms, square: 32, splice: 256 fft = 15552K, ave time = 2.8868 ms, square: 32, splice: 512 fft = 15552K, ave time = 2.8874 ms, square: 32, splice: 1024 fft = 15552K, min time = 2.8865 ms, square: 32, splice: 32 fft = 16384K, ave time = 2.9055 ms, square: 32, splice: 128 fft = 16384K, ave time = 2.9590 ms, square: 64, splice: 128 fft = 16384K, ave time = 2.9581 ms, square: 128, splice: 128 fft = 16384K, ave time = 2.9577 ms, square: 256, splice: 128 fft = 16384K, ave time = 2.9687 ms, square: 512, splice: 128 fft = 16384K, ave time = 2.9757 ms, square: 1024, splice: 128 fft = 16384K, ave time = 2.9054 ms, square: 32, splice: 32 fft = 16384K, ave time = 2.9053 ms, square: 32, splice: 64 fft = 16384K, ave time = 2.9053 ms, square: 32, splice: 128 fft = 16384K, ave time = 2.9053 ms, square: 32, splice: 256 fft = 16384K, ave time = 2.9054 ms, square: 32, splice: 512 fft = 16384K, ave time = 2.9057 ms, square: 32, splice: 1024 fft = 16384K, min time = 2.9053 ms, square: 32, splice: 128 fft = 16807K, ave time = 3.2238 ms, square: 32, splice: 128 fft = 16807K, ave time = 3.2778 ms, square: 64, splice: 128 fft = 16807K, ave time = 3.2768 ms, square: 128, splice: 128 fft = 16807K, ave time = 3.2767 ms, square: 256, splice: 128 fft = 16807K, ave time = 3.2235 ms, square: 32, splice: 32 fft = 16807K, ave time = 3.2236 ms, square: 32, splice: 64 fft = 16807K, ave time = 3.2237 ms, square: 32, splice: 128 fft = 16807K, ave time = 3.2236 ms, square: 32, splice: 256 fft = 16807K, ave time = 3.2236 ms, square: 32, splice: 512 fft = 16807K, ave time = 3.2238 ms, square: 32, splice: 1024 fft = 16807K, min time = 3.2235 ms, square: 32, splice: 32 fft = 16875K, ave time = 3.5467 ms, square: 32, splice: 128 fft = 16875K, ave time = 3.6028 ms, square: 64, splice: 128 fft = 16875K, ave time = 3.5976 ms, square: 128, splice: 128 fft = 16875K, ave time = 3.5999 ms, square: 256, splice: 128 fft = 16875K, ave time = 3.5421 ms, square: 32, splice: 32 fft = 16875K, ave time = 3.5435 ms, square: 32, splice: 64 fft = 16875K, ave time = 3.5438 ms, square: 32, splice: 128 fft = 16875K, ave time = 3.5452 ms, square: 32, splice: 256 fft = 16875K, ave time = 3.5477 ms, square: 32, splice: 512 fft = 16875K, ave time = 3.5470 ms, square: 32, splice: 1024 fft = 16875K, min time = 3.5421 ms, square: 32, splice: 32 fft = 18225K, ave time = 3.6696 ms, square: 32, splice: 128 fft = 18225K, ave time = 3.7344 ms, square: 64, splice: 128 fft = 18225K, ave time = 3.7291 ms, square: 128, splice: 128 fft = 18225K, ave time = 3.7318 ms, square: 256, splice: 128 fft = 18225K, ave time = 3.6699 ms, square: 32, splice: 32 fft = 18225K, ave time = 3.6752 ms, square: 32, splice: 64 fft = 18225K, ave time = 3.6718 ms, square: 32, splice: 128 fft = 18225K, ave time = 3.6716 ms, square: 32, splice: 256 fft = 18225K, ave time = 3.6689 ms, square: 32, splice: 512 fft = 18225K, ave time = 3.6709 ms, square: 32, splice: 1024 fft = 18225K, min time = 3.6689 ms, square: 32, splice: 512 fft = 19683K, ave time = 3.7033 ms, square: 32, splice: 128 fft = 19683K, ave time = 3.7665 ms, square: 64, splice: 128 fft = 19683K, ave time = 3.7655 ms, square: 128, splice: 128 fft = 19683K, ave time = 3.7650 ms, square: 256, splice: 128 fft = 19683K, ave time = 3.7028 ms, square: 32, splice: 32 fft = 19683K, ave time = 3.7028 ms, square: 32, splice: 64 fft = 19683K, ave time = 3.7033 ms, square: 32, splice: 128 fft = 19683K, ave time = 3.7028 ms, square: 32, splice: 256 fft = 19683K, ave time = 3.7027 ms, square: 32, splice: 512 fft = 19683K, ave time = 3.7025 ms, square: 32, splice: 1024 fft = 19683K, min time = 3.7025 ms, square: 32, splice: 1024 fft = 20736K, ave time = 3.7582 ms, square: 32, splice: 128 fft = 20736K, ave time = 3.8268 ms, square: 64, splice: 128 fft = 20736K, ave time = 3.8254 ms, square: 128, splice: 128 fft = 20736K, ave time = 3.8247 ms, square: 256, splice: 128 fft = 20736K, ave time = 3.8389 ms, square: 512, splice: 128 fft = 20736K, ave time = 3.8463 ms, square: 1024, splice: 128 fft = 20736K, ave time = 3.7575 ms, square: 32, splice: 32 fft = 20736K, ave time = 3.7577 ms, square: 32, splice: 64 fft = 20736K, ave time = 3.7581 ms, square: 32, splice: 128 fft = 20736K, ave time = 3.7573 ms, square: 32, splice: 256 fft = 20736K, ave time = 3.7576 ms, square: 32, splice: 512 fft = 20736K, ave time = 3.7570 ms, square: 32, splice: 1024 fft = 20736K, min time = 3.7570 ms, square: 32, splice: 1024 fft = 21952K, ave time = 3.8964 ms, square: 32, splice: 128 fft = 21952K, ave time = 3.9681 ms, square: 64, splice: 128 fft = 21952K, ave time = 3.9666 ms, square: 128, splice: 128 fft = 21952K, ave time = 3.9658 ms, square: 256, splice: 128 fft = 21952K, ave time = 3.9799 ms, square: 512, splice: 128 fft = 21952K, ave time = 3.9880 ms, square: 1024, splice: 128 fft = 21952K, ave time = 3.8956 ms, square: 32, splice: 32 fft = 21952K, ave time = 3.8959 ms, square: 32, splice: 64 fft = 21952K, ave time = 3.8963 ms, square: 32, splice: 128 fft = 21952K, ave time = 3.8958 ms, square: 32, splice: 256 fft = 21952K, ave time = 3.8957 ms, square: 32, splice: 512 fft = 21952K, ave time = 3.8958 ms, square: 32, splice: 1024 fft = 21952K, min time = 3.8956 ms, square: 32, splice: 32 fft = 25088K, ave time = 4.4769 ms, square: 32, splice: 128 fft = 25088K, ave time = 4.5595 ms, square: 64, splice: 128 fft = 25088K, ave time = 4.5570 ms, square: 128, splice: 128 fft = 25088K, ave time = 4.5600 ms, square: 256, splice: 128 fft = 25088K, ave time = 4.5754 ms, square: 512, splice: 128 fft = 25088K, ave time = 4.5847 ms, square: 1024, splice: 128 fft = 25088K, ave time = 4.4763 ms, square: 32, splice: 32 fft = 25088K, ave time = 4.4765 ms, square: 32, splice: 64 fft = 25088K, ave time = 4.4769 ms, square: 32, splice: 128 fft = 25088K, ave time = 4.4763 ms, square: 32, splice: 256 fft = 25088K, ave time = 4.4761 ms, square: 32, splice: 512 fft = 25088K, ave time = 4.4758 ms, square: 32, splice: 1024 fft = 25088K, min time = 4.4758 ms, square: 32, splice: 1024 fft = 27783K, ave time = 5.2900 ms, square: 32, splice: 128 fft = 27783K, ave time = 5.3804 ms, square: 64, splice: 128 fft = 27783K, ave time = 5.3789 ms, square: 128, splice: 128 fft = 27783K, ave time = 5.3783 ms, square: 256, splice: 128 fft = 27783K, ave time = 5.2894 ms, square: 32, splice: 32 fft = 27783K, ave time = 5.2894 ms, square: 32, splice: 64 fft = 27783K, ave time = 5.2896 ms, square: 32, splice: 128 fft = 27783K, ave time = 5.2893 ms, square: 32, splice: 256 fft = 27783K, ave time = 5.2894 ms, square: 32, splice: 512 fft = 27783K, ave time = 5.2891 ms, square: 32, splice: 1024 fft = 27783K, min time = 5.2891 ms, square: 32, splice: 1024 fft = 31104K, ave time = 5.7759 ms, square: 32, splice: 128 fft = 31104K, ave time = 5.8779 ms, square: 64, splice: 128 fft = 31104K, ave time = 5.8757 ms, square: 128, splice: 128 fft = 31104K, ave time = 5.8753 ms, square: 256, splice: 128 fft = 31104K, ave time = 5.8957 ms, square: 512, splice: 128 fft = 31104K, ave time = 5.9056 ms, square: 1024, splice: 128 fft = 31104K, ave time = 5.7745 ms, square: 32, splice: 32 fft = 31104K, ave time = 5.7748 ms, square: 32, splice: 64 fft = 31104K, ave time = 5.7754 ms, square: 32, splice: 128 fft = 31104K, ave time = 5.7752 ms, square: 32, splice: 256 fft = 31104K, ave time = 5.7742 ms, square: 32, splice: 512 fft = 31104K, ave time = 5.7740 ms, square: 32, splice: 1024 fft = 31104K, min time = 5.7740 ms, square: 32, splice: 1024 fft = 32768K, ave time = 5.7888 ms, square: 32, splice: 128 fft = 32768K, ave time = 5.8966 ms, square: 64, splice: 128 fft = 32768K, ave time = 5.8945 ms, square: 128, splice: 128 fft = 32768K, ave time = 5.8936 ms, square: 256, splice: 128 fft = 32768K, ave time = 5.9158 ms, square: 512, splice: 128 fft = 32768K, ave time = 5.9273 ms, square: 1024, splice: 128 fft = 32768K, ave time = 5.7879 ms, square: 32, splice: 32 fft = 32768K, ave time = 5.7882 ms, square: 32, splice: 64 fft = 32768K, ave time = 5.7887 ms, square: 32, splice: 128 fft = 32768K, ave time = 5.7885 ms, square: 32, splice: 256 fft = 32768K, ave time = 5.7878 ms, square: 32, splice: 512 fft = 32768K, ave time = 5.7878 ms, square: 32, splice: 1024 fft = 32768K, min time = 5.7878 ms, square: 32, splice: 1024 [ec2-user@ip-172-31-5-194 cudalucas]$ ubuntu@ip-172-31-21-38:/mnt-efs/z/cudalucas$ ./CUDALucas -threadbench 32768 65536 50 1 CUDALucas v2.06beta 64-bit build, compiled Aug 9 2018 @ 23:25:35 binary compiled for CUDA 9.20 CUDA runtime version 9.20 CUDA driver version 9.20 ------- DEVICE 0 ------- name Tesla V100-SXM2-16GB UUID ECC Support? Enabled Compatibility 7.0 clockRate (MHz) 1530 memClockRate (MHz) 877 totalGlobalMem 16945512448 totalConstMem 65536 l2CacheSize 6291456 sharedMemPerBlock 49152 regsPerBlock 65536 warpSize 32 memPitch 2147483647 maxThreadsPerBlock 1024 maxThreadsPerMP 2048 multiProcessorCount 80 maxThreadsDim[3] 1024,1024,64 maxGridSize[3] 2147483647,65535,65535 textureAlignment 512 deviceOverlap 1 pciDeviceID 30 pciBusID 0 Thread bench, testing various thread sizes for ffts 32768K to 65536K, doing 50 passes. fft = 32768K, ave time = 5.7886 ms, square: 32, splice: 128 fft = 32768K, ave time = 5.8968 ms, square: 64, splice: 128 fft = 32768K, ave time = 5.8948 ms, square: 128, splice: 128 fft = 32768K, ave time = 5.8943 ms, square: 256, splice: 128 fft = 32768K, ave time = 5.9163 ms, square: 512, splice: 128 fft = 32768K, ave time = 5.9288 ms, square: 1024, splice: 128 fft = 32768K, ave time = 5.7883 ms, square: 32, splice: 32 fft = 32768K, ave time = 5.7886 ms, square: 32, splice: 64 fft = 32768K, ave time = 5.7891 ms, square: 32, splice: 128 fft = 32768K, ave time = 5.7890 ms, square: 32, splice: 256 fft = 32768K, ave time = 5.7885 ms, square: 32, splice: 512 fft = 32768K, ave time = 5.7876 ms, square: 32, splice: 1024 fft = 32768K, min time = 5.7876 ms, square: 32, splice: 1024 fft = 33614K, ave time = 7.4420 ms, square: 32, splice: 128 fft = 33614K, ave time = 7.5526 ms, square: 64, splice: 128 fft = 33614K, ave time = 7.5502 ms, square: 128, splice: 128 fft = 33614K, ave time = 7.5502 ms, square: 256, splice: 128 fft = 33614K, ave time = 7.5737 ms, square: 512, splice: 128 fft = 33614K, ave time = 7.4411 ms, square: 32, splice: 32 fft = 33614K, ave time = 7.4414 ms, square: 32, splice: 64 fft = 33614K, ave time = 7.4419 ms, square: 32, splice: 128 fft = 33614K, ave time = 7.4421 ms, square: 32, splice: 256 fft = 33614K, ave time = 7.4411 ms, square: 32, splice: 512 fft = 33614K, ave time = 7.4408 ms, square: 32, splice: 1024 fft = 33614K, min time = 7.4408 ms, square: 32, splice: 1024 fft = 34992K, ave time = 7.6177 ms, square: 32, splice: 128 fft = 34992K, ave time = 7.7323 ms, square: 64, splice: 128 fft = 34992K, ave time = 7.7303 ms, square: 128, splice: 128 fft = 34992K, ave time = 7.7299 ms, square: 256, splice: 128 fft = 34992K, ave time = 7.7540 ms, square: 512, splice: 128 fft = 34992K, ave time = 7.7653 ms, square: 1024, splice: 128 fft = 34992K, ave time = 7.6169 ms, square: 32, splice: 32 fft = 34992K, ave time = 7.6167 ms, square: 32, splice: 64 fft = 34992K, ave time = 7.6175 ms, square: 32, splice: 128 fft = 34992K, ave time = 7.6177 ms, square: 32, splice: 256 fft = 34992K, ave time = 7.6168 ms, square: 32, splice: 512 fft = 34992K, ave time = 7.6162 ms, square: 32, splice: 1024 fft = 34992K, min time = 7.6162 ms, square: 32, splice: 1024 fft = 36000K, ave time = 7.7604 ms, square: 32, splice: 128 fft = 36000K, ave time = 7.8794 ms, square: 64, splice: 128 fft = 36000K, ave time = 7.8774 ms, square: 128, splice: 128 fft = 36000K, ave time = 7.8768 ms, square: 256, splice: 128 fft = 36000K, ave time = 7.9011 ms, square: 512, splice: 128 fft = 36000K, ave time = 7.9124 ms, square: 1024, splice: 128 fft = 36000K, ave time = 7.7598 ms, square: 32, splice: 32 fft = 36000K, ave time = 7.7596 ms, square: 32, splice: 64 fft = 36000K, ave time = 7.7601 ms, square: 32, splice: 128 fft = 36000K, ave time = 7.7605 ms, square: 32, splice: 256 fft = 36000K, ave time = 7.7597 ms, square: 32, splice: 512 fft = 36000K, ave time = 7.7592 ms, square: 32, splice: 1024 fft = 36000K, min time = 7.7592 ms, square: 32, splice: 1024 fft = 36288K, ave time = 7.9010 ms, square: 32, splice: 128 fft = 36288K, ave time = 8.0198 ms, square: 64, splice: 128 fft = 36288K, ave time = 8.0181 ms, square: 128, splice: 128 fft = 36288K, ave time = 8.0176 ms, square: 256, splice: 128 fft = 36288K, ave time = 8.0421 ms, square: 512, splice: 128 fft = 36288K, ave time = 8.0548 ms, square: 1024, splice: 128 fft = 36288K, ave time = 7.9000 ms, square: 32, splice: 32 fft = 36288K, ave time = 7.8999 ms, square: 32, splice: 64 fft = 36288K, ave time = 7.9006 ms, square: 32, splice: 128 fft = 36288K, ave time = 7.9007 ms, square: 32, splice: 256 fft = 36288K, ave time = 7.8999 ms, square: 32, splice: 512 fft = 36288K, ave time = 7.8998 ms, square: 32, splice: 1024 fft = 36288K, min time = 7.8998 ms, square: 32, splice: 1024 fft = 38416K, ave time = 8.3214 ms, square: 32, splice: 128 fft = 38416K, ave time = 8.4482 ms, square: 64, splice: 128 fft = 38416K, ave time = 8.4458 ms, square: 128, splice: 128 fft = 38416K, ave time = 8.4453 ms, square: 256, splice: 128 fft = 38416K, ave time = 8.4704 ms, square: 512, splice: 128 fft = 38416K, ave time = 8.4839 ms, square: 1024, splice: 128 fft = 38416K, ave time = 8.3204 ms, square: 32, splice: 32 fft = 38416K, ave time = 8.3207 ms, square: 32, splice: 64 fft = 38416K, ave time = 8.3216 ms, square: 32, splice: 128 fft = 38416K, ave time = 8.3214 ms, square: 32, splice: 256 fft = 38416K, ave time = 8.3207 ms, square: 32, splice: 512 fft = 38416K, ave time = 8.3202 ms, square: 32, splice: 1024 fft = 38416K, min time = 8.3202 ms, square: 32, splice: 1024 fft = 39200K, ave time = 8.4447 ms, square: 32, splice: 128 fft = 39200K, ave time = 8.5748 ms, square: 64, splice: 128 fft = 39200K, ave time = 8.5723 ms, square: 128, splice: 128 fft = 39200K, ave time = 8.5711 ms, square: 256, splice: 128 fft = 39200K, ave time = 8.5969 ms, square: 512, splice: 128 fft = 39200K, ave time = 8.6098 ms, square: 1024, splice: 128 fft = 39200K, ave time = 8.4436 ms, square: 32, splice: 32 fft = 39200K, ave time = 8.4437 ms, square: 32, splice: 64 fft = 39200K, ave time = 8.4444 ms, square: 32, splice: 128 fft = 39200K, ave time = 8.4444 ms, square: 32, splice: 256 fft = 39200K, ave time = 8.4442 ms, square: 32, splice: 512 fft = 39200K, ave time = 8.4432 ms, square: 32, splice: 1024 fft = 39200K, min time = 8.4432 ms, square: 32, splice: 1024 fft = 39366K, ave time = 8.7095 ms, square: 32, splice: 128 fft = 39366K, ave time = 8.8378 ms, square: 64, splice: 128 fft = 39366K, ave time = 8.8354 ms, square: 128, splice: 128 fft = 39366K, ave time = 8.8354 ms, square: 256, splice: 128 fft = 39366K, ave time = 8.8614 ms, square: 512, splice: 128 fft = 39366K, ave time = 8.7076 ms, square: 32, splice: 32 fft = 39366K, ave time = 8.7083 ms, square: 32, splice: 64 fft = 39366K, ave time = 8.7093 ms, square: 32, splice: 128 fft = 39366K, ave time = 8.7091 ms, square: 32, splice: 256 fft = 39366K, ave time = 8.7086 ms, square: 32, splice: 512 fft = 39366K, ave time = 8.7077 ms, square: 32, splice: 1024 fft = 39366K, min time = 8.7076 ms, square: 32, splice: 32 fft = 40500K, ave time = 8.6271 ms, square: 32, splice: 128 fft = 40500K, ave time = 8.7603 ms, square: 64, splice: 128 fft = 40500K, ave time = 8.7581 ms, square: 128, splice: 128 fft = 40500K, ave time = 8.7577 ms, square: 256, splice: 128 fft = 40500K, ave time = 8.7850 ms, square: 512, splice: 128 fft = 40500K, ave time = 8.7981 ms, square: 1024, splice: 128 fft = 40500K, ave time = 8.6255 ms, square: 32, splice: 32 fft = 40500K, ave time = 8.6266 ms, square: 32, splice: 64 fft = 40500K, ave time = 8.6269 ms, square: 32, splice: 128 fft = 40500K, ave time = 8.6271 ms, square: 32, splice: 256 fft = 40500K, ave time = 8.6266 ms, square: 32, splice: 512 fft = 40500K, ave time = 8.6255 ms, square: 32, splice: 1024 fft = 40500K, min time = 8.6255 ms, square: 32, splice: 1024 fft = 41472K, ave time = 7.3125 ms, square: 32, splice: 128 fft = 41472K, ave time = 7.4489 ms, square: 64, splice: 128 fft = 41472K, ave time = 7.4473 ms, square: 128, splice: 128 fft = 41472K, ave time = 7.4449 ms, square: 256, splice: 128 fft = 41472K, ave time = 7.4721 ms, square: 512, splice: 128 fft = 41472K, ave time = 7.4872 ms, square: 1024, splice: 128 fft = 41472K, ave time = 7.3105 ms, square: 32, splice: 32 fft = 41472K, ave time = 7.3116 ms, square: 32, splice: 64 fft = 41472K, ave time = 7.3121 ms, square: 32, splice: 128 fft = 41472K, ave time = 7.3120 ms, square: 32, splice: 256 fft = 41472K, ave time = 7.3119 ms, square: 32, splice: 512 fft = 41472K, ave time = 7.3108 ms, square: 32, splice: 1024 fft = 41472K, min time = 7.3105 ms, square: 32, splice: 32 fft = 42336K, ave time = 9.0994 ms, square: 32, splice: 128 fft = 42336K, ave time = 9.2387 ms, square: 64, splice: 128 fft = 42336K, ave time = 9.2365 ms, square: 128, splice: 128 fft = 42336K, ave time = 9.2357 ms, square: 256, splice: 128 fft = 42336K, ave time = 9.2640 ms, square: 512, splice: 128 fft = 42336K, ave time = 9.2781 ms, square: 1024, splice: 128 fft = 42336K, ave time = 9.0976 ms, square: 32, splice: 32 fft = 42336K, ave time = 9.0986 ms, square: 32, splice: 64 fft = 42336K, ave time = 9.0989 ms, square: 32, splice: 128 fft = 42336K, ave time = 9.0992 ms, square: 32, splice: 256 fft = 42336K, ave time = 9.0987 ms, square: 32, splice: 512 fft = 42336K, ave time = 9.0980 ms, square: 32, splice: 1024 fft = 42336K, min time = 9.0976 ms, square: 32, splice: 32 fft = 43200K, ave time = 9.3958 ms, square: 32, splice: 128 fft = 43200K, ave time = 9.5374 ms, square: 64, splice: 128 fft = 43200K, ave time = 9.5353 ms, square: 128, splice: 128 fft = 43200K, ave time = 9.5346 ms, square: 256, splice: 128 fft = 43200K, ave time = 9.5628 ms, square: 512, splice: 128 fft = 43200K, ave time = 9.5768 ms, square: 1024, splice: 128 fft = 43200K, ave time = 9.3941 ms, square: 32, splice: 32 fft = 43200K, ave time = 9.3953 ms, square: 32, splice: 64 fft = 43200K, ave time = 9.3955 ms, square: 32, splice: 128 fft = 43200K, ave time = 9.3957 ms, square: 32, splice: 256 fft = 43200K, ave time = 9.3954 ms, square: 32, splice: 512 fft = 43200K, ave time = 9.3945 ms, square: 32, splice: 1024 fft = 43200K, min time = 9.3941 ms, square: 32, splice: 32 fft = 43904K, ave time = 7.8800 ms, square: 32, splice: 128 fft = 43904K, ave time = 8.0248 ms, square: 64, splice: 128 fft = 43904K, ave time = 8.0225 ms, square: 128, splice: 128 fft = 43904K, ave time = 8.0214 ms, square: 256, splice: 128 fft = 43904K, ave time = 8.0504 ms, square: 512, splice: 128 fft = 43904K, ave time = 8.0639 ms, square: 1024, splice: 128 fft = 43904K, ave time = 7.8785 ms, square: 32, splice: 32 fft = 43904K, ave time = 7.8797 ms, square: 32, splice: 64 fft = 43904K, ave time = 7.8802 ms, square: 32, splice: 128 fft = 43904K, ave time = 7.8807 ms, square: 32, splice: 256 fft = 43904K, ave time = 7.8798 ms, square: 32, splice: 512 fft = 43904K, ave time = 7.8795 ms, square: 32, splice: 1024 fft = 43904K, min time = 7.8785 ms, square: 32, splice: 32 fft = 47628K, ave time = 10.2253 ms, square: 32, splice: 128 fft = 47628K, ave time = 10.3819 ms, square: 64, splice: 128 fft = 47628K, ave time = 10.3792 ms, square: 128, splice: 128 fft = 47628K, ave time = 10.3778 ms, square: 256, splice: 128 fft = 47628K, ave time = 10.4080 ms, square: 512, splice: 128 fft = 47628K, ave time = 10.4244 ms, square: 1024, splice: 128 fft = 47628K, ave time = 10.2234 ms, square: 32, splice: 32 fft = 47628K, ave time = 10.2244 ms, square: 32, splice: 64 fft = 47628K, ave time = 10.2250 ms, square: 32, splice: 128 fft = 47628K, ave time = 10.2249 ms, square: 32, splice: 256 fft = 47628K, ave time = 10.2244 ms, square: 32, splice: 512 fft = 47628K, ave time = 10.2233 ms, square: 32, splice: 1024 fft = 47628K, min time = 10.2233 ms, square: 32, splice: 1024 fft = 49000K, ave time = 10.5086 ms, square: 32, splice: 128 fft = 49000K, ave time = 10.6699 ms, square: 64, splice: 128 fft = 49000K, ave time = 10.6672 ms, square: 128, splice: 128 fft = 49000K, ave time = 10.6662 ms, square: 256, splice: 128 fft = 49000K, ave time = 10.6979 ms, square: 512, splice: 128 fft = 49000K, ave time = 10.7141 ms, square: 1024, splice: 128 fft = 49000K, ave time = 10.5064 ms, square: 32, splice: 32 fft = 49000K, ave time = 10.5080 ms, square: 32, splice: 64 fft = 49000K, ave time = 10.5082 ms, square: 32, splice: 128 fft = 49000K, ave time = 10.5080 ms, square: 32, splice: 256 fft = 49000K, ave time = 10.5079 ms, square: 32, splice: 512 fft = 49000K, ave time = 10.5071 ms, square: 32, splice: 1024 fft = 49000K, min time = 10.5064 ms, square: 32, splice: 32 fft = 50000K, ave time = 10.7785 ms, square: 32, splice: 128 fft = 50000K, ave time = 10.9435 ms, square: 64, splice: 128 fft = 50000K, ave time = 10.9406 ms, square: 128, splice: 128 fft = 50000K, ave time = 10.9393 ms, square: 256, splice: 128 fft = 50000K, ave time = 10.9727 ms, square: 512, splice: 128 fft = 50000K, ave time = 10.9882 ms, square: 1024, splice: 128 fft = 50000K, ave time = 10.7767 ms, square: 32, splice: 32 fft = 50000K, ave time = 10.7778 ms, square: 32, splice: 64 fft = 50000K, ave time = 10.7782 ms, square: 32, splice: 128 fft = 50000K, ave time = 10.7783 ms, square: 32, splice: 256 fft = 50000K, ave time = 10.7779 ms, square: 32, splice: 512 fft = 50000K, ave time = 10.7770 ms, square: 32, splice: 1024 fft = 50000K, min time = 10.7767 ms, square: 32, splice: 32 fft = 50176K, ave time = 10.6853 ms, square: 32, splice: 128 fft = 50176K, ave time = 10.8504 ms, square: 64, splice: 128 fft = 50176K, ave time = 10.8483 ms, square: 128, splice: 128 fft = 50176K, ave time = 10.8464 ms, square: 256, splice: 128 fft = 50176K, ave time = 10.8793 ms, square: 512, splice: 128 fft = 50176K, ave time = 10.8949 ms, square: 1024, splice: 128 fft = 50176K, ave time = 10.6832 ms, square: 32, splice: 32 fft = 50176K, ave time = 10.6847 ms, square: 32, splice: 64 fft = 50176K, ave time = 10.6850 ms, square: 32, splice: 128 fft = 50176K, ave time = 10.6848 ms, square: 32, splice: 256 fft = 50176K, ave time = 10.6846 ms, square: 32, splice: 512 fft = 50176K, ave time = 10.6837 ms, square: 32, splice: 1024 fft = 50176K, min time = 10.6832 ms, square: 32, splice: 32 fft = 51200K, ave time = 11.1375 ms, square: 32, splice: 128 fft = 51200K, ave time = 11.3047 ms, square: 64, splice: 128 fft = 51200K, ave time = 11.3016 ms, square: 128, splice: 128 fft = 51200K, ave time = 11.3015 ms, square: 256, splice: 128 fft = 51200K, ave time = 11.3341 ms, square: 512, splice: 128 fft = 51200K, ave time = 11.3497 ms, square: 1024, splice: 128 fft = 51200K, ave time = 11.1355 ms, square: 32, splice: 32 fft = 51200K, ave time = 11.1369 ms, square: 32, splice: 64 fft = 51200K, ave time = 11.1369 ms, square: 32, splice: 128 fft = 51200K, ave time = 11.1371 ms, square: 32, splice: 256 fft = 51200K, ave time = 11.1367 ms, square: 32, splice: 512 fft = 51200K, ave time = 11.1361 ms, square: 32, splice: 1024 fft = 51200K, min time = 11.1355 ms, square: 32, splice: 32 fft = 52488K, ave time = 11.1839 ms, square: 32, splice: 128 fft = 52488K, ave time = 11.3575 ms, square: 64, splice: 128 fft = 52488K, ave time = 11.3543 ms, square: 128, splice: 128 fft = 52488K, ave time = 11.3530 ms, square: 256, splice: 128 fft = 52488K, ave time = 11.3874 ms, square: 512, splice: 128 fft = 52488K, ave time = 11.4035 ms, square: 1024, splice: 128 fft = 52488K, ave time = 11.1819 ms, square: 32, splice: 32 fft = 52488K, ave time = 11.1830 ms, square: 32, splice: 64 fft = 52488K, ave time = 11.1834 ms, square: 32, splice: 128 fft = 52488K, ave time = 11.1833 ms, square: 32, splice: 256 fft = 52488K, ave time = 11.1829 ms, square: 32, splice: 512 fft = 52488K, ave time = 11.1823 ms, square: 32, splice: 1024 fft = 52488K, min time = 11.1819 ms, square: 32, splice: 32 fft = 54432K, ave time = 11.9336 ms, square: 32, splice: 128 fft = 54432K, ave time = 12.1124 ms, square: 64, splice: 128 fft = 54432K, ave time = 12.1094 ms, square: 128, splice: 128 fft = 54432K, ave time = 12.1083 ms, square: 256, splice: 128 fft = 54432K, ave time = 12.1443 ms, square: 512, splice: 128 fft = 54432K, ave time = 12.1601 ms, square: 1024, splice: 128 fft = 54432K, ave time = 11.9316 ms, square: 32, splice: 32 fft = 54432K, ave time = 11.9330 ms, square: 32, splice: 64 fft = 54432K, ave time = 11.9333 ms, square: 32, splice: 128 fft = 54432K, ave time = 11.9329 ms, square: 32, splice: 256 fft = 54432K, ave time = 11.9331 ms, square: 32, splice: 512 fft = 54432K, ave time = 11.9319 ms, square: 32, splice: 1024 fft = 54432K, min time = 11.9316 ms, square: 32, splice: 32 fft = 55296K, ave time = 11.7796 ms, square: 32, splice: 128 fft = 55296K, ave time = 11.9614 ms, square: 64, splice: 128 fft = 55296K, ave time = 11.9583 ms, square: 128, splice: 128 fft = 55296K, ave time = 11.9575 ms, square: 256, splice: 128 fft = 55296K, ave time = 11.9937 ms, square: 512, splice: 128 fft = 55296K, ave time = 12.0108 ms, square: 1024, splice: 128 fft = 55296K, ave time = 11.7777 ms, square: 32, splice: 32 fft = 55296K, ave time = 11.7790 ms, square: 32, splice: 64 fft = 55296K, ave time = 11.7793 ms, square: 32, splice: 128 fft = 55296K, ave time = 11.7792 ms, square: 32, splice: 256 fft = 55296K, ave time = 11.7788 ms, square: 32, splice: 512 fft = 55296K, ave time = 11.7780 ms, square: 32, splice: 1024 fft = 55296K, min time = 11.7777 ms, square: 32, splice: 32 fft = 56000K, ave time = 12.4634 ms, square: 32, splice: 128 fft = 56000K, ave time = 12.6470 ms, square: 64, splice: 128 fft = 56000K, ave time = 12.6456 ms, square: 128, splice: 128 fft = 56000K, ave time = 12.6435 ms, square: 256, splice: 128 fft = 56000K, ave time = 12.6805 ms, square: 512, splice: 128 fft = 56000K, ave time = 12.6969 ms, square: 1024, splice: 128 fft = 56000K, ave time = 12.4613 ms, square: 32, splice: 32 fft = 56000K, ave time = 12.4625 ms, square: 32, splice: 64 fft = 56000K, ave time = 12.4630 ms, square: 32, splice: 128 fft = 56000K, ave time = 12.4629 ms, square: 32, splice: 256 fft = 56000K, ave time = 12.4622 ms, square: 32, splice: 512 fft = 56000K, ave time = 12.4616 ms, square: 32, splice: 1024 fft = 56000K, min time = 12.4613 ms, square: 32, splice: 32 fft = 57344K, ave time = 12.3683 ms, square: 32, splice: 128 fft = 57344K, ave time = 12.5556 ms, square: 64, splice: 128 fft = 57344K, ave time = 12.5542 ms, square: 128, splice: 128 fft = 57344K, ave time = 12.5519 ms, square: 256, splice: 128 fft = 57344K, ave time = 12.5875 ms, square: 512, splice: 128 fft = 57344K, ave time = 12.6059 ms, square: 1024, splice: 128 fft = 57344K, ave time = 12.3664 ms, square: 32, splice: 32 fft = 57344K, ave time = 12.3676 ms, square: 32, splice: 64 fft = 57344K, ave time = 12.3678 ms, square: 32, splice: 128 fft = 57344K, ave time = 12.3681 ms, square: 32, splice: 256 fft = 57344K, ave time = 12.3678 ms, square: 32, splice: 512 fft = 57344K, ave time = 12.3671 ms, square: 32, splice: 1024 fft = 57344K, min time = 12.3664 ms, square: 32, splice: 32 fft = 60750K, ave time = 13.4505 ms, square: 32, splice: 128 fft = 60750K, ave time = 13.6514 ms, square: 64, splice: 128 fft = 60750K, ave time = 13.6477 ms, square: 128, splice: 128 fft = 60750K, ave time = 13.6466 ms, square: 256, splice: 128 fft = 60750K, ave time = 13.6874 ms, square: 512, splice: 128 fft = 60750K, ave time = 13.4485 ms, square: 32, splice: 32 fft = 60750K, ave time = 13.4500 ms, square: 32, splice: 64 fft = 60750K, ave time = 13.4507 ms, square: 32, splice: 128 fft = 60750K, ave time = 13.4508 ms, square: 32, splice: 256 fft = 60750K, ave time = 13.4503 ms, square: 32, splice: 512 fft = 60750K, ave time = 13.4493 ms, square: 32, splice: 1024 fft = 60750K, min time = 13.4485 ms, square: 32, splice: 32 fft = 62500K, ave time = 13.5061 ms, square: 32, splice: 128 fft = 62500K, ave time = 13.7116 ms, square: 64, splice: 128 fft = 62500K, ave time = 13.7083 ms, square: 128, splice: 128 fft = 62500K, ave time = 13.7073 ms, square: 256, splice: 128 fft = 62500K, ave time = 13.7478 ms, square: 512, splice: 128 fft = 62500K, ave time = 13.7663 ms, square: 1024, splice: 128 fft = 62500K, ave time = 13.5041 ms, square: 32, splice: 32 fft = 62500K, ave time = 13.5053 ms, square: 32, splice: 64 fft = 62500K, ave time = 13.5055 ms, square: 32, splice: 128 fft = 62500K, ave time = 13.5060 ms, square: 32, splice: 256 fft = 62500K, ave time = 13.5054 ms, square: 32, splice: 512 fft = 62500K, ave time = 13.5051 ms, square: 32, splice: 1024 fft = 62500K, min time = 13.5041 ms, square: 32, splice: 32 fft = 64000K, ave time = 11.3400 ms, square: 32, splice: 128 fft = 64000K, ave time = 11.5500 ms, square: 64, splice: 128 fft = 64000K, ave time = 11.5463 ms, square: 128, splice: 128 fft = 64000K, ave time = 11.5476 ms, square: 256, splice: 128 fft = 64000K, ave time = 11.5898 ms, square: 512, splice: 128 fft = 64000K, ave time = 11.6131 ms, square: 1024, splice: 128 fft = 64000K, ave time = 11.3383 ms, square: 32, splice: 32 fft = 64000K, ave time = 11.3392 ms, square: 32, splice: 64 fft = 64000K, ave time = 11.3395 ms, square: 32, splice: 128 fft = 64000K, ave time = 11.3398 ms, square: 32, splice: 256 fft = 64000K, ave time = 11.3394 ms, square: 32, splice: 512 fft = 64000K, ave time = 11.3393 ms, square: 32, splice: 1024 fft = 64000K, min time = 11.3383 ms, square: 32, splice: 32 fft = 64800K, ave time = 13.9271 ms, square: 32, splice: 128 fft = 64800K, ave time = 14.1397 ms, square: 64, splice: 128 fft = 64800K, ave time = 14.1365 ms, square: 128, splice: 128 fft = 64800K, ave time = 14.1354 ms, square: 256, splice: 128 fft = 64800K, ave time = 14.1789 ms, square: 512, splice: 128 fft = 64800K, ave time = 14.1990 ms, square: 1024, splice: 128 fft = 64800K, ave time = 13.9252 ms, square: 32, splice: 32 fft = 64800K, ave time = 13.9269 ms, square: 32, splice: 64 fft = 64800K, ave time = 13.9267 ms, square: 32, splice: 128 fft = 64800K, ave time = 13.9271 ms, square: 32, splice: 256 fft = 64800K, ave time = 13.9271 ms, square: 32, splice: 512 fft = 64800K, ave time = 13.9266 ms, square: 32, splice: 1024 fft = 64800K, min time = 13.9252 ms, square: 32, splice: 32 fft = 65536K, ave time = 11.4963 ms, square: 32, splice: 128 fft = 65536K, ave time = 11.7123 ms, square: 64, splice: 128 fft = 65536K, ave time = 11.7089 ms, square: 128, splice: 128 fft = 65536K, ave time = 11.7070 ms, square: 256, splice: 128 fft = 65536K, ave time = 11.7482 ms, square: 512, splice: 128 fft = 65536K, ave time = 11.7686 ms, square: 1024, splice: 128 fft = 65536K, ave time = 11.4942 ms, square: 32, splice: 32 fft = 65536K, ave time = 11.4957 ms, square: 32, splice: 64 fft = 65536K, ave time = 11.4958 ms, square: 32, splice: 128 fft = 65536K, ave time = 11.4960 ms, square: 32, splice: 256 fft = 65536K, ave time = 11.4959 ms, square: 32, splice: 512 fft = 65536K, ave time = 11.4961 ms, square: 32, splice: 1024 fft = 65536K, min time = 11.4942 ms, square: 32, splice: 32 ubuntu@ip-172-31-21-38:/mnt-efs/z/cudalucas$