iLab Neuromorphic Robotics Toolkit  0.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
test-ImagePerformance.C
Go to the documentation of this file.
1 /*! @file
2  @author Randolph Voorhies
3  @copyright GNU Public License (GPL v3)
4  @section License
5  @verbatim
6  // ////////////////////////////////////////////////////////////////////////
7  // The iLab Neuromorphic Robotics Toolkit (NRT) //
8  // Copyright 2010-2012 by the University of Southern California (USC) //
9  // and the iLab at USC. //
10  // //
11  // iLab - University of Southern California //
12  // Hedco Neurociences Building, Room HNB-10 //
13  // Los Angeles, Ca 90089-2520 - USA //
14  // //
15  // See http://ilab.usc.edu for information about this project. //
16  // ////////////////////////////////////////////////////////////////////////
17  // This file is part of The iLab Neuromorphic Robotics Toolkit. //
18  // //
19  // The iLab Neuromorphic Robotics Toolkit is free software: you can //
20  // redistribute it and/or modify it under the terms of the GNU General //
21  // Public License as published by the Free Software Foundation, either //
22  // version 3 of the License, or (at your option) any later version. //
23  // //
24  // The iLab Neuromorphic Robotics Toolkit is distributed in the hope //
25  // that it will be useful, but WITHOUT ANY WARRANTY; without even the //
26  // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR //
27  // PURPOSE. See the GNU General Public License for more details. //
28  // //
29  // You should have received a copy of the GNU General Public License //
30  // along with The iLab Neuromorphic Robotics Toolkit. If not, see //
31  // <http://www.gnu.org/licenses/>. //
32  // ////////////////////////////////////////////////////////////////////////
33  @endverbatim */
34 
35 
36 #include <nrt/Core/Image/Image.H>
38 #include <nrt/Eigen/Eigen.H>
39 #include <chrono>
40 
41 #include <xmmintrin.h> // for SSE
42 
43 using namespace nrt;
44 
45 
46 template<class T>
47 void testAddition(int const numTimingRuns)
48 {
49  NRT_INFO("Testing Type: " << demangledName<T>());
50 
51  typedef typename pixel_traits<T>::pod_type PodType;
52  typedef typename promote<T,T>::type PromoType;
53  typedef typename pixel_traits<PromoType>::pod_type PromoPodType;
54 
55  Image<T> im1(Dims<int32>(640,480), T(1));
56  Image<T> im2(Dims<int32>(640,480), T(2));
57  Image<PromoType> im3;
58 
59  im3 = Image<PromoType>(im1.dims(), ImageInitPolicy::Zeros);
60  {
61  auto startTime = std::chrono::steady_clock::now();
62  for(int i=0; i<numTimingRuns; ++i)
63  {
64  Image<T> im3(im1+im2);
65  }
66 
67  auto endTime = std::chrono::steady_clock::now();
68  auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
69  NRT_INFO(" Default Add:\t\t\t" << totalTime.count() << "s");
70  }
71 
72  im3 = Image<PromoType>(im1.dims(), ImageInitPolicy::Zeros);
73  {
74  auto startTime = std::chrono::steady_clock::now();
75  for(int i=0; i<numTimingRuns; ++i)
76  {
77  im3 = im1+im2;
78  }
79 
80  auto endTime = std::chrono::steady_clock::now();
81  auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
82  NRT_INFO(" Operator Overload Add:\t\t" << totalTime.count() << "s");
83  for(T pix : im3) for(PodType val : pix) if(val != 3) NRT_FATAL("val == " << val);
84  }
85 
86  im3 = Image<PromoType>(im1.dims(), ImageInitPolicy::Zeros);
87  {
88  auto startTime = std::chrono::steady_clock::now();
89  for(int i=0; i<numTimingRuns; ++i)
90  {
91  typename Image<T>::const_iterator im1It = im1.const_begin();
92  typename Image<T>::const_iterator im2It = im2.const_begin();
93  typename Image<T>::const_iterator im2End = im2.const_end();
94  typename Image<PromoType>::iterator im3It = im3.begin();
95 
96  while(im2It != im2End) *im3It++ = *im1It++ + *im2It++;
97  }
98  auto endTime = std::chrono::steady_clock::now();
99  auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
100  NRT_INFO(" Iterator Add:\t\t\t" << totalTime.count() << "s");
101  for(T pix : im3) for(PodType val : pix) if(val != 3) NRT_FATAL("val == " << val);
102  }
103 
104  im3 = Image<PromoType>(im1.dims(), ImageInitPolicy::Zeros);
105  {
106  auto startTime = std::chrono::steady_clock::now();
107  for(int i=0; i<numTimingRuns; ++i)
108  {
109  std::transform(im1.const_begin(), im1.const_end(), im2.const_begin(), im3.begin(), std::plus<PromoType>());
110  }
111  auto endTime = std::chrono::steady_clock::now();
112  auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
113  NRT_INFO(" Iterator transform:\t\t" << totalTime.count() << "s");
114  for(T pix : im3) for(PodType val : pix) if(val != 3) NRT_FATAL("val == " << val);
115  }
116 
117  im3 = Image<PromoType>(im1.dims(), ImageInitPolicy::Zeros);
118  {
119  auto startTime = std::chrono::steady_clock::now();
120  for(int i=0; i<numTimingRuns; ++i)
121  {
122  typename Image<T>::const_pod_iterator im1It = im1.const_pod_begin();
123  typename Image<T>::const_pod_iterator im2It = im2.const_pod_begin();
124  typename Image<T>::const_pod_iterator im2End = im2.const_pod_end();
125  typename Image<PromoType>::pod_iterator im3It = im3.pod_begin();
126 
127  while(im2It != im2End) *im3It++ = *im1It++ + *im2It++;
128  }
129  auto endTime = std::chrono::steady_clock::now();
130  auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
131  NRT_INFO(" Pointer Add:\t\t\t" << totalTime.count() << "s");
132  for(T pix : im3) for(PodType val : pix) if(val != 3) NRT_FATAL("val == " << val);
133  }
134 
135  im3 = Image<PromoType>(im1.dims(), ImageInitPolicy::Zeros);
136  {
137  auto startTime = std::chrono::steady_clock::now();
138  for(int i=0; i<numTimingRuns; ++i)
139  {
140  std::transform(im1.const_pod_begin(), im1.const_pod_end(), im2.const_pod_begin(),
141  im3.pod_begin(), std::plus<PromoPodType>());
142  }
143  auto endTime = std::chrono::steady_clock::now();
144  auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
145  NRT_INFO(" Pointer transform:\t\t" << totalTime.count() << "s");
146  for(T pix : im3) for(PodType val : pix) if(val != 3) NRT_FATAL("val == " << val);
147  }
148 
149  im3 = Image<PromoType>(im1.dims(), ImageInitPolicy::Zeros);
150  {
151  auto startTime = std::chrono::steady_clock::now();
152  for(int i=0; i<numTimingRuns; ++i)
153  {
154  Eigen::Map<Eigen::Matrix<PodType, Eigen::Dynamic, 1> const, Eigen::Aligned>
155  lhsMap(im1.const_pod_begin(), im1.size()*pixel_traits<T>::num_channels);
156 
157  Eigen::Map<Eigen::Matrix<PodType, Eigen::Dynamic, 1> const, Eigen::Aligned>
158  rhsMap(im2.const_pod_begin(), im2.size()*pixel_traits<T>::num_channels);
159 
160  Eigen::Map<Eigen::Matrix<PodType, Eigen::Dynamic, 1>, Eigen::Aligned>
161  retMap(im3.pod_begin(), im3.size()*pixel_traits<T>::num_channels);
162 
163  retMap = lhsMap+rhsMap;
164  }
165  auto endTime = std::chrono::steady_clock::now();
166  auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
167  NRT_INFO(" Eigen Add:\t\t\t" << totalTime.count() << "s");
168  for(T pix : im3) for(PodType val : pix) if(val != 3) NRT_FATAL("val == " << val);
169  }
170 
171  im3 = Image<PromoType>(im1.dims(), ImageInitPolicy::Zeros);
172  {
173  auto startTime = std::chrono::steady_clock::now();
174 
175  for(int i=0; i<numTimingRuns; ++i)
176  {
177  // note: this code assumes float for the pixels' POD type
178  __m128 const * src1 = reinterpret_cast<__m128 const *>(im1.const_pod_begin());
179  __m128 const * end1 = reinterpret_cast<__m128 const *>(im1.const_pod_end());
180  __m128 const * src2 = reinterpret_cast<__m128 const *>(im2.const_pod_begin());
181  float * dst = im3.pod_begin();
182 
183  // note: Image has 16-byte aligned memory; otherwise we would need to use _mm_loadu_ps128 to load the values and
184  // _mm_storeu_ps128 to store the result, see for example
185  // http://stackoverflow.com/questions/5118158/using-sse-to-speed-up-computation-store-load-and-alignment
186  while (src1 != end1)
187  {
188  __m128 sum = _mm_add_ps(*src1, *src2);
189  _mm_store_ps(dst, sum);
190  ++src1; ++src2; dst += 4;
191  }
192  }
193  auto endTime = std::chrono::steady_clock::now();
194  auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
195  NRT_INFO(" SSE2 Add:\t\t\t" << totalTime.count() << "s");
196  for(T pix : im3) for(PodType val : pix) if(val != 3) NRT_FATAL("val == " << val);
197  }
198 
199 }
200 
201 int main()
202 {
203  int const numTimingRuns = 500;
204 
205  testAddition<PixRGB<float>>(numTimingRuns);
206 
207 
208  //// Safe Access
209  //{
210  // Image<PixGray<byte>, SafeAccess> safeImage(640, 480, ImageInitPolicy::Zeros);
211  // auto startTime = std::chrono::steady_clock::now();
212 
213  // for(int tries=0; tries<100; ++tries)
214  // for(int y=0; y<safeImage.height(); ++y)
215  // for(int x=0; x<safeImage.width(); ++x)
216  // safeImage(x,y) = PixGray<byte>(x);
217 
218  // auto endTime = std::chrono::steady_clock::now();
219  // auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
220  // NRT_INFO("Safe Access: " << totalTime.count() << "s");
221  //}
222 
223  //// Fast Access
224  //{
225  // Image<PixGray<byte>> fastImage(640, 480, ImageInitPolicy::Zeros);
226  // auto startTime = std::chrono::steady_clock::now();
227 
228  // for(int tries=0; tries<100; ++tries)
229  // for(int y=0; y<fastImage.height(); ++y)
230  // for(int x=0; x<fastImage.width(); ++x)
231  // fastImage(x,y) = PixGray<byte>(x);
232 
233  // auto endTime = std::chrono::steady_clock::now();
234  // auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
235  // NRT_INFO("Fast Access: " << totalTime.count() << "s");
236  //}
237 
238  //// Unique Access
239  //{
240  // Image<PixGray<byte>> img(640, 480, ImageInitPolicy::Zeros);
241  // img.deepCopy();
242  // Image<PixGray<byte>, UniqueAccess> uniqueImage = img;
243  // auto startTime = std::chrono::steady_clock::now();
244 
245  // for(int tries=0; tries<100; ++tries)
246  // for(int y=0; y<uniqueImage.height(); ++y)
247  // for(int x=0; x<uniqueImage.width(); ++x)
248  // uniqueImage(x,y) = PixGray<byte>(x);
249 
250  // auto endTime = std::chrono::steady_clock::now();
251  // auto totalTime = std::chrono::duration_cast<std::chrono::duration<double>>(endTime - startTime);
252  // NRT_INFO("Unique Access: " << totalTime.count() << "s");
253  //}
254 
255  return 0;
256 }