iLab Neuromorphic Robotics Toolkit  0.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tests/test-ImagePerformance.C
/*! @file
@author Randolph Voorhies
@copyright GNU Public License (GPL v3)
@section License
@verbatim
// ////////////////////////////////////////////////////////////////////////
// The iLab Neuromorphic Robotics Toolkit (NRT) //
// Copyright 2010-2012 by the University of Southern California (USC) //
// and the iLab at USC. //
// //
// iLab - University of Southern California //
// Hedco Neurociences Building, Room HNB-10 //
// Los Angeles, Ca 90089-2520 - USA //
// //
// See http://ilab.usc.edu for information about this project. //
// ////////////////////////////////////////////////////////////////////////
// This file is part of The iLab Neuromorphic Robotics Toolkit. //
// //
// The iLab Neuromorphic Robotics Toolkit is free software: you can //
// redistribute it and/or modify it under the terms of the GNU General //
// Public License as published by the Free Software Foundation, either //
// version 3 of the License, or (at your option) any later version. //
// //
// The iLab Neuromorphic Robotics Toolkit is distributed in the hope //
// that it will be useful, but WITHOUT ANY WARRANTY; without even the //
// implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
// PURPOSE. See the GNU General Public License for more details. //
// //
// You should have received a copy of the GNU General Public License //
// along with The iLab Neuromorphic Robotics Toolkit. If not, see //
// <http://www.gnu.org/licenses/>. //
// ////////////////////////////////////////////////////////////////////////
@endverbatim */
#include <chrono>
#include <xmmintrin.h> // for SSE
using namespace nrt;
template<class T>
void testAddition(int const numTimingRuns)
{
NRT_INFO("Testing Type: " << demangledName<T>());
typedef typename pixel_traits<T>::pod_type PodType;
typedef typename promote<T,T>::type PromoType;
typedef typename pixel_traits<PromoType>::pod_type PromoPodType;
Image<T> im1(Dims<int32>(640,480), T(1));
Image<T> im2(Dims<int32>(640,480), T(2));
im3 = Image<PromoType>(im1.dims(), ImageInitPolicy::Zeros);
{
auto startTime = std::chrono::steady_clock::now();
for(int i=0; i<numTimingRuns; ++i)
{
Image<T> im3(im1+im2);
}
auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
NRT_INFO(" Default Add:\t\t\t" << totalTime.count() << "s");
}
im3 = Image<PromoType>(im1.dims(), ImageInitPolicy::Zeros);
{
auto startTime = std::chrono::steady_clock::now();
for(int i=0; i<numTimingRuns; ++i)
{
im3 = im1+im2;
}
auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
NRT_INFO(" Operator Overload Add:\t\t" << totalTime.count() << "s");
for(T pix : im3) for(PodType val : pix) if(val != 3) NRT_FATAL("val == " << val);
}
im3 = Image<PromoType>(im1.dims(), ImageInitPolicy::Zeros);
{
auto startTime = std::chrono::steady_clock::now();
for(int i=0; i<numTimingRuns; ++i)
{
typename Image<T>::const_iterator im1It = im1.const_begin();
typename Image<T>::const_iterator im2It = im2.const_begin();
typename Image<T>::const_iterator im2End = im2.const_end();
typename Image<PromoType>::iterator im3It = im3.begin();
while(im2It != im2End) *im3It++ = *im1It++ + *im2It++;
}
auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
NRT_INFO(" Iterator Add:\t\t\t" << totalTime.count() << "s");
for(T pix : im3) for(PodType val : pix) if(val != 3) NRT_FATAL("val == " << val);
}
im3 = Image<PromoType>(im1.dims(), ImageInitPolicy::Zeros);
{
auto startTime = std::chrono::steady_clock::now();
for(int i=0; i<numTimingRuns; ++i)
{
std::transform(im1.const_begin(), im1.const_end(), im2.const_begin(), im3.begin(), std::plus<PromoType>());
}
auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
NRT_INFO(" Iterator transform:\t\t" << totalTime.count() << "s");
for(T pix : im3) for(PodType val : pix) if(val != 3) NRT_FATAL("val == " << val);
}
im3 = Image<PromoType>(im1.dims(), ImageInitPolicy::Zeros);
{
auto startTime = std::chrono::steady_clock::now();
for(int i=0; i<numTimingRuns; ++i)
{
typename Image<PromoType>::pod_iterator im3It = im3.pod_begin();
while(im2It != im2End) *im3It++ = *im1It++ + *im2It++;
}
auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
NRT_INFO(" Pointer Add:\t\t\t" << totalTime.count() << "s");
for(T pix : im3) for(PodType val : pix) if(val != 3) NRT_FATAL("val == " << val);
}
im3 = Image<PromoType>(im1.dims(), ImageInitPolicy::Zeros);
{
auto startTime = std::chrono::steady_clock::now();
for(int i=0; i<numTimingRuns; ++i)
{
im3.pod_begin(), std::plus<PromoPodType>());
}
auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
NRT_INFO(" Pointer transform:\t\t" << totalTime.count() << "s");
for(T pix : im3) for(PodType val : pix) if(val != 3) NRT_FATAL("val == " << val);
}
im3 = Image<PromoType>(im1.dims(), ImageInitPolicy::Zeros);
{
auto startTime = std::chrono::steady_clock::now();
for(int i=0; i<numTimingRuns; ++i)
{
Eigen::Map<Eigen::Matrix<PodType, Eigen::Dynamic, 1> const, Eigen::Aligned>
Eigen::Map<Eigen::Matrix<PodType, Eigen::Dynamic, 1> const, Eigen::Aligned>
Eigen::Map<Eigen::Matrix<PodType, Eigen::Dynamic, 1>, Eigen::Aligned>
retMap = lhsMap+rhsMap;
}
auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
NRT_INFO(" Eigen Add:\t\t\t" << totalTime.count() << "s");
for(T pix : im3) for(PodType val : pix) if(val != 3) NRT_FATAL("val == " << val);
}
im3 = Image<PromoType>(im1.dims(), ImageInitPolicy::Zeros);
{
auto startTime = std::chrono::steady_clock::now();
for(int i=0; i<numTimingRuns; ++i)
{
// note: this code assumes float for the pixels' POD type
__m128 const * src1 = reinterpret_cast<__m128 const *>(im1.const_pod_begin());
__m128 const * end1 = reinterpret_cast<__m128 const *>(im1.const_pod_end());
__m128 const * src2 = reinterpret_cast<__m128 const *>(im2.const_pod_begin());
float * dst = im3.pod_begin();
// note: Image has 16-byte aligned memory; otherwise we would need to use _mm_loadu_ps128 to load the values and
// _mm_storeu_ps128 to store the result, see for example
// http://stackoverflow.com/questions/5118158/using-sse-to-speed-up-computation-store-load-and-alignment
while (src1 != end1)
{
__m128 sum = _mm_add_ps(*src1, *src2);
_mm_store_ps(dst, sum);
++src1; ++src2; dst += 4;
}
}
auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
NRT_INFO(" SSE2 Add:\t\t\t" << totalTime.count() << "s");
for(T pix : im3) for(PodType val : pix) if(val != 3) NRT_FATAL("val == " << val);
}
}
int main()
{
int const numTimingRuns = 500;
testAddition<PixRGB<float>>(numTimingRuns);
//// Safe Access
//{
// Image<PixGray<byte>, SafeAccess> safeImage(640, 480, ImageInitPolicy::Zeros);
// auto startTime = std::chrono::steady_clock::now();
// for(int tries=0; tries<100; ++tries)
// for(int y=0; y<safeImage.height(); ++y)
// for(int x=0; x<safeImage.width(); ++x)
// safeImage(x,y) = PixGray<byte>(x);
// auto endTime = std::chrono::steady_clock::now();
// auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
// NRT_INFO("Safe Access: " << totalTime.count() << "s");
//}
//// Fast Access
//{
// Image<PixGray<byte>> fastImage(640, 480, ImageInitPolicy::Zeros);
// auto startTime = std::chrono::steady_clock::now();
// for(int tries=0; tries<100; ++tries)
// for(int y=0; y<fastImage.height(); ++y)
// for(int x=0; x<fastImage.width(); ++x)
// fastImage(x,y) = PixGray<byte>(x);
// auto endTime = std::chrono::steady_clock::now();
// auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
// NRT_INFO("Fast Access: " << totalTime.count() << "s");
//}
//// Unique Access
//{
// Image<PixGray<byte>> img(640, 480, ImageInitPolicy::Zeros);
// img.deepCopy();
// Image<PixGray<byte>, UniqueAccess> uniqueImage = img;
// auto startTime = std::chrono::steady_clock::now();
// for(int tries=0; tries<100; ++tries)
// for(int y=0; y<uniqueImage.height(); ++y)
// for(int x=0; x<uniqueImage.width(); ++x)
// uniqueImage(x,y) = PixGray<byte>(x);
// auto endTime = std::chrono::steady_clock::now();
// auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
// NRT_INFO("Unique Access: " << totalTime.count() << "s");
//}
return 0;
}