00001 /*!@file Channels/MotionSpatioTemporalChannel.C */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Christian Siagian <siagian@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Channels/MotionSpatioTemporalChannel.C $ 00035 // $Id: $ 00036 // 00037 00038 #ifndef MOTIONSPATIOTEMPORALCHANNEL_C_DEFINED 00039 #define MOTIONSPATIOTEMPORALCHANNEL_C_DEFINED 00040 00041 #include "Channels/MotionSpatioTemporalChannel.H" 00042 00043 #include "Channels/ChannelOpts.H" 00044 #include "Channels/DirectionSpatioTemporalChannel.H" 00045 #include "Component/OptionManager.H" 00046 #include "rutz/trace.h" 00047 #include "Util/Timer.H" 00048 #include "Image/ShapeOps.H" 00049 #include "Image/FilterOps.H" 00050 #include "Image/DrawOps.H" 00051 00052 #define SELF_MOTION_WEIGHT 1.0F 00053 #define OBJECT_MOTION_WEIGHT 1.0F 00054 #define MAX_FIRING_RATE 100.0F 00055 00056 // ###################################################################### 00057 // MotionSpatioTemporalChannel member definitions: 00058 // ###################################################################### 00059 00060 // ###################################################################### 00061 MotionSpatioTemporalChannel::MotionSpatioTemporalChannel(OptionManager& mgr) : 00062 ComplexChannel(mgr, 00063 "MotionSpatioTemporal", 00064 "motionSpatioTemporal", 00065 MOTIONSPATIOTEMPORAL), 00066 itsPyrType("MotionChannelPyramidType", this, Gaussian5), 00067 itsNumDirs(&OPT_NumSpatioTemporalDirections, this), // see Channels/ChannelOpts.{H,C} 00068 itsNumSpeeds(&OPT_NumSpatioTemporalSpeeds, this), // see Channels/ChannelOpts.{H,C} 00069 itsFoeDetector(new FoeDetector(mgr)) 00070 { 00071 GVX_TRACE(__PRETTY_FUNCTION__); 00072 00073 // let's create our subchannels (may be reconfigured later if our 00074 // number of directions changes): 00075 buildSubChans(); 00076 00077 itsMT.reset(new MiddleTemporal()); 00078 addSubComponent(itsFoeDetector); 00079 00080 itsCurrentFoeMapIndex = -1; 00081 itsWin.reset(); 00082 } 00083 00084 // ###################################################################### 00085 MotionSpatioTemporalChannel::~MotionSpatioTemporalChannel() 00086 { 00087 GVX_TRACE(__PRETTY_FUNCTION__); 00088 } 00089 00090 // ###################################################################### 00091 DirectionSpatioTemporalChannel& MotionSpatioTemporalChannel::dirChan 00092 (const uint idx) const 00093 { 00094 GVX_TRACE(__PRETTY_FUNCTION__); 00095 return *(dynCast<DirectionSpatioTemporalChannel>(subChan(idx))); 00096 } 00097 00098 // ###################################################################### 00099 void MotionSpatioTemporalChannel::buildSubChans() 00100 { 00101 GVX_TRACE(__PRETTY_FUNCTION__); 00102 // kill any subchans we may have had... 00103 this->removeAllSubChans(); 00104 00105 // let's instantiate our subchannels now that we know how many we 00106 // want. They will inherit the current values (typically 00107 // post-command-line parsing) of all their options as they are 00108 // constructed: 00109 LINFO("Using %d directions spanning [0..360]deg", itsNumDirs.getVal()); 00110 00111 // the various directional pyrbuilders 00112 itsDirectionSpatioTemporalChannels.clear(); 00113 itsDirectionSpatioTemporalChannels.resize(itsNumDirs.getVal()); 00114 00115 // go through the different directions and displacement/time 00116 for (uint i = 0; i < itsNumDirs.getVal(); i++) 00117 { 00118 for (uint j = 0; j < itsNumSpeeds.getVal(); j++) 00119 { 00120 float speed = pow(2.0, j); 00121 00122 nub::ref<DirectionSpatioTemporalChannel> chan = 00123 makeSharedComp(new DirectionSpatioTemporalChannel 00124 (getManager(), i, j, 00125 360.0 * double(i) / 00126 double(itsNumDirs.getVal()), 00127 speed, 00128 itsPyrType.getVal())); 00129 00130 itsDirectionSpatioTemporalChannels[i].push_back(chan); 00131 00132 this->addSubChan(chan); 00133 00134 chan->exportOptions(MC_RECURSE); 00135 } 00136 } 00137 00138 // Spatiotemporal features and MT features 00139 itsRawSpatioTemporalEnergy.clear(); 00140 itsRawSpatioTemporalEnergy.resize(itsNumDirs.getVal()); 00141 for(uint i = 0; i < itsNumDirs.getVal(); i++) 00142 itsRawSpatioTemporalEnergy[i].resize(itsNumSpeeds.getVal()); 00143 } 00144 00145 // ###################################################################### 00146 void MotionSpatioTemporalChannel::paramChanged(ModelParamBase* const param, 00147 const bool valueChanged, 00148 ParamClient::ChangeStatus* status) 00149 { 00150 GVX_TRACE(__PRETTY_FUNCTION__); 00151 ComplexChannel::paramChanged(param, valueChanged, status); 00152 00153 // if the param is our number of orientations and it has become 00154 // different from our number of channels, let's reconfigure: 00155 if (param == &itsNumDirs && 00156 numChans() != itsNumDirs.getVal()) 00157 buildSubChans(); 00158 } 00159 00160 // ###################################################################### 00161 void MotionSpatioTemporalChannel::doInput(const InputFrame& inframe) 00162 { 00163 GVX_TRACE(__PRETTY_FUNCTION__); 00164 ASSERT(inframe.grayFloat().initialized()); 00165 00166 Timer tim1(1000000); 00167 00168 Image<byte> image(inframe.grayFloat()); 00169 itsCurrentImage = image; 00170 00171 // compute spatiotemporal motion detection 00172 // into several directions and speeds 00173 for (uint i = 0; i < numChans(); i++) 00174 { 00175 subChan(i)->input(inframe); 00176 LINFO("Motion pyramid (%d/%d) ok.", i+1, numChans()); 00177 } 00178 00179 // get the spatiotemporal energy 00180 uint index = 0; 00181 for (uint i = 0; i < itsNumDirs.getVal(); i++) 00182 { 00183 for (uint j = 0; j < itsNumSpeeds.getVal(); j++) 00184 { 00185 itsRawSpatioTemporalEnergy[i][j] = 00186 itsDirectionSpatioTemporalChannels[i][j]->getSpatioTemporalEnergy(); 00187 //itsSpatioTemporalPyrBuilders[i][j]->getSpatioTemporalEnergy(); 00188 //dynamic_cast<DirectionSpatioTemporalChannel*>(subChan(index)) 00189 // ->getSpatioTemporalEnergy(); 00190 //subChan(index)->getSpatioTemporalEnergy(); 00191 index++; 00192 } 00193 } 00194 00195 LINFO(" time: %f \n", tim1.get()/1000.0); 00196 00197 // if there is enough frames to compute the spatiotemporal energy 00198 if(itsRawSpatioTemporalEnergy[0][0].size() != 0) 00199 { 00200 Timer tim2(1000000); 00201 00202 // compute the Middle Temporal features 00203 itsMT->computeMTfeatures(itsRawSpatioTemporalEnergy); 00204 std::vector<Image<float> > mtFeatures = itsMT->getMTfeatures(); 00205 for (uint i = 0; i < itsNumDirs.getVal(); i++) 00206 for (uint j = 0; j < itsNumSpeeds.getVal(); j++) 00207 itsDirectionSpatioTemporalChannels[i][j]->setMTfeatureMap(mtFeatures[i]); 00208 00209 LINFO("\n computeMTfeatures time: %f \n", tim2.get()/1000.0); 00210 00211 // compute motion conspicuity map 00212 computeConspicuityMap(); 00213 00214 LINFO("\n computeConspicuityMap time: %f \n", tim2.get()/1000.0); 00215 } 00216 } 00217 00218 // ###################################################################### 00219 void MotionSpatioTemporalChannel::computeConspicuityMap() 00220 { 00221 GVX_TRACE(__PRETTY_FUNCTION__); 00222 00223 // if the MT features has not been computed just return blank map 00224 std::vector<Image<float> > mtFeatures = itsMT->getMTfeatures(); 00225 uint mtWidth = mtFeatures[0].getWidth(); 00226 uint mtHeight = mtFeatures[0].getHeight(); 00227 if(mtWidth == 0 || mtHeight == 0) return; 00228 00229 uint cmWidth = subChan(0)->getMapDims().w(); 00230 uint cmHeight = subChan(0)->getMapDims().h(); 00231 00232 // go through all the V1 features 00233 // Max Normalize each direction&speed and combine 00234 //Image<float> result = getV1ObjectMotionMap(); 00235 Image<float> result = getMTObjectMotionMap(); 00236 00237 Image<float> tsubmap = maxNormalize(result, MAXNORMMIN, MAXNORMMAX, 00238 itsNormType.getVal()); 00239 result = tsubmap * numChans(); 00240 Image<float> tres = result; 00241 result = rescale(tres, Dims(mtWidth, mtHeight)); 00242 00243 // // May add a map that comes from higher level Motion areas 00244 // // : MST: FOE, planar motion 00245 // // : STS: Biological motion 00246 00247 // // NOTE: FOE_METHOD_TEMPLATE is fooled by Planar movement!!! 00248 // Image<float> foeMap = 00249 // itsFoeDetector->getFoeMap(itsMT->getMTfeatures(), 00250 // itsMT->getMToptimalShift(), 00251 // FOE_METHOD_TEMPLATE, false);// FOE_METHOD_AVERAGE); 00252 00253 // // if(itsWin.is_invalid()) 00254 // // itsWin.reset(new XWinManaged(Dims(mtWidth*4, mtHeight*4), 00255 // // 10, 0, "MotSpch: conspicuity map")); 00256 // // else itsWin->setDims(Dims(mtWidth*4, mtHeight*4)); 00257 // // itsWin->drawImage(zoomXY(foeMap,4),0,0); Raster::waitForKey(); 00258 00259 // // crazy normalizer 00260 // float mn,mx; getMinMax(foeMap,mn,mx); 00261 // inplaceNormalize(foeMap, 0.0F, 1.0F); 00262 // foeMap = toPower(foeMap, 40.0F); 00263 // foeMap *= mx; 00264 00265 // // weight the firing rate to the maximum possible firing rate 00266 // foeMap *= (SELF_MOTION_WEIGHT * MAX_FIRING_RATE * numChans()); 00267 00268 //getMinMax(result,mn,mx); 00269 //LINFO("FINAL MSTv : %f %f",mn,mx); 00270 00271 // getMinMax(foeMap,mn,mx); 00272 // LINFO("FINAL MSTd : %f %f",mn,mx); 00273 00274 00275 // itsWin->drawImage(zoomXY(foeMap,4),0,0); Raster::waitForKey(); 00276 // itsWin->drawImage(zoomXY(result,4),0,0); Raster::waitForKey(); 00277 00278 LINFO("TOOK OUT THE FOE MAP\n\n\n"); 00279 //result += foeMap; 00280 00281 // itsWin->drawImage(zoomXY(result,4),0,0); Raster::waitForKey(); 00282 00283 // resize submap to fixed scale if necessary: 00284 float mn, mx; 00285 getMinMax(result,mn,mx); 00286 if (mtWidth > cmWidth) 00287 result = downSize(result, Dims(cmWidth, cmHeight)); 00288 else if (mtWidth < cmWidth) 00289 result = rescale(result, Dims(cmWidth, cmHeight)); 00290 inplaceNormalize(result,0.0F,mx); 00291 00292 itsConspicuityMap = result; 00293 } 00294 00295 // ###################################################################### 00296 Image<float> MotionSpatioTemporalChannel::getV1ObjectMotionMap() 00297 { 00298 uint cmWidth = subChan(0)->getMapDims().w(); 00299 uint cmHeight = subChan(0)->getMapDims().h(); 00300 Image<float> result(cmWidth, cmHeight, ZEROS); 00301 00302 for (uint i = 0; i < itsNumDirs.getVal(); i++) 00303 { 00304 for (uint j = 0; j < itsNumSpeeds.getVal(); j++) 00305 { 00306 for (uint k = 0; k < itsRawSpatioTemporalEnergy[i][j].size(); k++) 00307 { 00308 Image<float> tmap = itsRawSpatioTemporalEnergy[i][j][k]; 00309 Image<float> submap = downSizeClean(tmap, Dims(cmWidth, cmHeight)); 00310 00311 Image<float> psubmap; 00312 if (itsUseOlderVersion.getVal()) 00313 { 00314 LDEBUG("%s[%d]: applying %s(%f .. %f)", 00315 tagName().c_str(), i, 00316 maxNormTypeName(itsNormType.getVal()), MAXNORMMIN, MAXNORMMAX); 00317 psubmap = maxNormalize(submap, MAXNORMMIN, MAXNORMMAX, 00318 itsNormType.getVal()); 00319 } 00320 else 00321 { 00322 LDEBUG("%s[%d]: applying %s(0.0 .. 0.0)", tagName().c_str(), i, 00323 maxNormTypeName(itsNormType.getVal())); 00324 psubmap = maxNormalize(submap, 0.0f, 0.0f, itsNormType.getVal()); 00325 } 00326 00327 result += psubmap; 00328 00329 00330 00331 00332 // uint scale = pow(2.0, k); 00333 00334 // LINFO("mt[%d][%d][%d]", i,j,k); 00335 // if(itsWin.is_invalid()) 00336 // itsWin.reset(new XWinManaged(Dims(mtWidth*8, mtHeight*8), 00337 // 10, 0, "MotSpch: conspicuity map")); 00338 // Image<float> disp(Dims(8*mtWidth,8*mtHeight), NO_INIT); 00339 // //itsWin->setDims(Dims(8*mtWidth,4*mtHeight)); 00340 00341 00342 // Image<float> dtmap = zoomXY(tmap, scale); 00343 // inplaceNormalize(dtmap, 0.0F, 255.0F); 00344 // inplacePaste(disp, dtmap, Point2D<int>(0,0)); 00345 00346 00347 // LINFO("mn"); 00348 // Image<float> dsmap = zoomXY(submap,16); //16 00349 // inplaceNormalize(dsmap, 0.0F, 255.0F); 00350 // inplacePaste(disp, dsmap, Point2D<int>(4*mtWidth, 0)); 00351 00352 // LINFO("mn2"); 00353 // Image<float> dpsmap = zoomXY(psubmap, 16); 00354 // //Image<float> dpsmap = zoomXY(psubmap, 16); 00355 // inplaceNormalize(dpsmap, 0.0F, 255.0F); 00356 // inplacePaste(disp, dpsmap, Point2D<int>(0, 4*mtHeight)); 00357 00358 // LINFO("mn3"); 00359 // Image<float> dres = zoomXY(result, 16); 00360 // inplaceNormalize(dres, 0.0F, 255.0F); 00361 // inplacePaste(disp, dres, Point2D<int>(4*mtWidth, 4*mtHeight)); 00362 00363 // //itsWin->drawImage(rescale(result, Dims(4*mtWidth, 4*mtHeight)),0,0); 00364 // itsWin->drawImage(disp,0,0); 00365 // Raster::waitForKey(); 00366 00367 } 00368 } 00369 } 00370 00371 return result; 00372 } 00373 00374 // ###################################################################### 00375 Image<float> MotionSpatioTemporalChannel::getMTObjectMotionMap() 00376 { 00377 std::vector<Image<float> > mtFeatures = itsMT->getMTfeatures(); 00378 uint mtWidth = mtFeatures[0].getWidth(); 00379 uint mtHeight = mtFeatures[0].getHeight(); 00380 if(mtWidth == 0 || mtHeight == 0) return Image<float>(); 00381 00382 uint cmWidth = subChan(0)->getMapDims().w(); 00383 uint cmHeight = subChan(0)->getMapDims().h(); 00384 //Image<float> result(cmWidth, cmHeight, ZEROS); 00385 Image<float> result(mtWidth, mtHeight, ZEROS); 00386 00387 LINFO("MT: %d %d CM: %d %d", mtWidth,mtHeight, cmWidth, cmHeight); 00388 00389 00390 00391 // Image<float> img = itsCurrentImage; 00392 // uint imgWidth = img.getWidth(); 00393 // uint imgHeight = img.getHeight(); 00394 // Image<float> disp2(Dims(imgWidth, imgHeight), NO_INIT); 00395 // if(itsWin.is_invalid()) 00396 // itsWin.reset(new XWinManaged(Dims(imgWidth, imgHeight), 00397 // 10, 0, "MotSpch: MT conspicuity map")); 00398 00399 00400 00401 for (uint i = 0; i < mtFeatures.size(); i++) 00402 { 00403 Image<float> tmap = mtFeatures[i]; 00404 00405 00406 00407 00408 // Image<float> im = img; 00409 // inplaceNormalize(im, 0.0F, 255.0F); 00410 // Image<float> mt = zoomXY(tmap, 4); 00411 // inplaceNormalize(mt, 0.0F, 255.0F); 00412 // disp2 = im + mt; 00413 // itsWin->drawImage(disp2,0,0); 00414 // Raster::waitForKey(); 00415 00416 00417 00418 00419 00420 00421 00422 Image<float> submap = tmap; //downSizeClean(tmap, Dims(cmWidth, cmHeight)); 00423 00424 Image<float> psubmap; 00425 // if (itsUseOlderVersion.getVal()) 00426 // { 00427 // LDEBUG("%s[%d]: applying %s(%f .. %f)", 00428 // tagName().c_str(), i, 00429 // maxNormTypeName(itsNormType.getVal()), MAXNORMMIN, MAXNORMMAX); 00430 // psubmap = maxNormalize(submap, MAXNORMMIN, MAXNORMMAX, 00431 // itsNormType.getVal()); 00432 // } 00433 // else 00434 // { 00435 // LDEBUG("%s[%d]: applying %s(0.0 .. 0.0)", tagName().c_str(), i, 00436 // maxNormTypeName(itsNormType.getVal())); 00437 // psubmap = maxNormalize(submap, 0.0f, 0.0f, itsNormType.getVal()); 00438 // } 00439 psubmap = submap; 00440 00441 result += psubmap; 00442 00443 //uint scale = 1; //4 for CM 00444 // LINFO("mt[%d]", i); 00445 // if(itsWin.is_invalid()) 00446 // itsWin.reset(new XWinManaged(Dims(mtWidth*8, mtHeight*8), 00447 // 10, 0, "MotSpch: MT conspicuity map")); 00448 // Image<float> disp(Dims(8*mtWidth,8*mtHeight), NO_INIT); 00449 // itsWin->setDims(Dims(8*mtWidth,8*mtHeight)); 00450 00451 // Image<float> dtmap = zoomXY(tmap, 4); 00452 // inplaceNormalize(dtmap, 0.0F, 255.0F); 00453 // inplacePaste(disp, dtmap, Point2D<int>(0,0)); 00454 00455 // LINFO("mn"); 00456 // Image<float> dsmap = zoomXY(submap,4); //16 00457 // inplaceNormalize(dsmap, 0.0F, 255.0F); 00458 // inplacePaste(disp, dsmap, Point2D<int>(4*mtWidth, 0)); 00459 00460 // LINFO("mn2"); 00461 // Image<float> dpsmap = zoomXY(psubmap, 4); 00462 // inplaceNormalize(dpsmap, 0.0F, 255.0F); 00463 // inplacePaste(disp, dpsmap, Point2D<int>(0, 4*mtHeight)); 00464 00465 // LINFO("mn3"); 00466 // Image<float> dres = zoomXY(result, 4); 00467 // inplaceNormalize(dres, 0.0F, 255.0F); 00468 // inplacePaste(disp, dres, Point2D<int>(4*mtWidth, 4*mtHeight)); 00469 00470 // itsWin->drawImage(disp,0,0); 00471 // Raster::waitForKey(); 00472 } 00473 00474 00475 result = downSizeClean(result, Dims(cmWidth, cmHeight)); 00476 return result; 00477 } 00478 00479 // ###################################################################### 00480 Image<float> MotionSpatioTemporalChannel::getOutput() 00481 { 00482 GVX_TRACE(__PRETTY_FUNCTION__); 00483 return itsConspicuityMap; 00484 } 00485 00486 // ###################################################################### 00487 Image<float> MotionSpatioTemporalChannel:: 00488 downSizeMax(Image<float> img, uint scale) 00489 { 00490 img = lowPassX(9,img); 00491 img = lowPassY(9,img); 00492 00493 uint width = img.getWidth(); 00494 uint height = img.getHeight(); 00495 00496 uint oWidth = width/scale; 00497 uint oHeight = height/scale; 00498 00499 Image<float> result(oWidth, oHeight, NO_INIT); 00500 for(uint i = 0; i < oWidth; i++) 00501 for(uint j = 0; j < oHeight; j++) 00502 { 00503 float max = 0.0; 00504 for(uint di = 0; di < scale; di++) 00505 for(uint dj = 0; dj < scale; dj++) 00506 { 00507 uint ci = i*scale + di; 00508 uint cj = j*scale + dj; 00509 float val = img.getVal(ci,cj); 00510 if(val > max) max = val; 00511 } 00512 result.setVal(i,j,max); 00513 } 00514 00515 return result; 00516 } 00517 00518 // ###################################################################### 00519 /* So things look consistent in everyone's emacs... */ 00520 /* Local Variables: */ 00521 /* indent-tabs-mode: nil */ 00522 /* End: */ 00523 00524 #endif // MOTION_SPATIOTEMPORALENERGY_CHANNEL_C_DEFINED