btQuickprof.cpp

Go to the documentation of this file.
00001 /*
00002 
00003 ***************************************************************************************************
00004 **
00005 ** profile.cpp
00006 **
00007 ** Real-Time Hierarchical Profiling for Game Programming Gems 3
00008 **
00009 ** by Greg Hjelstrom & Byon Garrabrant
00010 **
00011 ***************************************************************************************************/
00012 
00013 // Credits: The Clock class was inspired by the Timer classes in 
00014 // Ogre (www.ogre3d.org).
00015 
00016 #include "btQuickprof.h"
00017 
00018 #ifndef BT_NO_PROFILE
00019 
00020 
00021 static btClock gProfileClock;
00022 
00023 
00024 #ifdef __CELLOS_LV2__
00025 #include <sys/sys_time.h>
00026 #include <sys/time_util.h>
00027 #include <stdio.h>
00028 #endif
00029 
00030 #if defined (SUNOS) || defined (__SUNOS__) 
00031 #include <stdio.h> 
00032 #endif
00033 
00034 #if defined(WIN32) || defined(_WIN32)
00035 
00036 #define BT_USE_WINDOWS_TIMERS
00037 #define WIN32_LEAN_AND_MEAN
00038 #define NOWINRES
00039 #define NOMCX
00040 #define NOIME 
00041 
00042 #ifdef _XBOX
00043         #include <Xtl.h>
00044 #else //_XBOX
00045         #include <windows.h>
00046 #endif //_XBOX
00047 
00048 #include <time.h>
00049 
00050 
00051 #else //_WIN32
00052 #include <sys/time.h>
00053 #endif //_WIN32
00054 
00055 #define mymin(a,b) (a > b ? a : b)
00056 
00057 struct btClockData
00058 {
00059 
00060 #ifdef BT_USE_WINDOWS_TIMERS
00061         LARGE_INTEGER mClockFrequency;
00062         DWORD mStartTick;
00063         LONGLONG mPrevElapsedTime;
00064         LARGE_INTEGER mStartTime;
00065 #else
00066 #ifdef __CELLOS_LV2__
00067         uint64_t        mStartTime;
00068 #else
00069         struct timeval mStartTime;
00070 #endif
00071 #endif //__CELLOS_LV2__
00072 
00073 };
00074 
00076 btClock::btClock()
00077 {
00078         m_data = new btClockData;
00079 #ifdef BT_USE_WINDOWS_TIMERS
00080         QueryPerformanceFrequency(&m_data->mClockFrequency);
00081 #endif
00082         reset();
00083 }
00084 
00085 btClock::~btClock()
00086 {
00087         delete m_data;
00088 }
00089 
00090 btClock::btClock(const btClock& other)
00091 {
00092         m_data = new btClockData;
00093         *m_data = *other.m_data;
00094 }
00095 
00096 btClock& btClock::operator=(const btClock& other)
00097 {
00098         *m_data = *other.m_data;
00099         return *this;
00100 }
00101 
00102 
00104 void btClock::reset()
00105 {
00106 #ifdef BT_USE_WINDOWS_TIMERS
00107         QueryPerformanceCounter(&m_data->mStartTime);
00108         m_data->mStartTick = GetTickCount();
00109         m_data->mPrevElapsedTime = 0;
00110 #else
00111 #ifdef __CELLOS_LV2__
00112 
00113         typedef uint64_t  ClockSize;
00114         ClockSize newTime;
00115         //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
00116         SYS_TIMEBASE_GET( newTime );
00117         m_data->mStartTime = newTime;
00118 #else
00119         gettimeofday(&m_data->mStartTime, 0);
00120 #endif
00121 #endif
00122 }
00123 
00126 unsigned long int btClock::getTimeMilliseconds()
00127 {
00128 #ifdef BT_USE_WINDOWS_TIMERS
00129         LARGE_INTEGER currentTime;
00130         QueryPerformanceCounter(&currentTime);
00131         LONGLONG elapsedTime = currentTime.QuadPart - 
00132                 m_data->mStartTime.QuadPart;
00133                 // Compute the number of millisecond ticks elapsed.
00134         unsigned long msecTicks = (unsigned long)(1000 * elapsedTime / 
00135                 m_data->mClockFrequency.QuadPart);
00136                 // Check for unexpected leaps in the Win32 performance counter.  
00137         // (This is caused by unexpected data across the PCI to ISA 
00138                 // bridge, aka south bridge.  See Microsoft KB274323.)
00139                 unsigned long elapsedTicks = GetTickCount() - m_data->mStartTick;
00140                 signed long msecOff = (signed long)(msecTicks - elapsedTicks);
00141                 if (msecOff < -100 || msecOff > 100)
00142                 {
00143                         // Adjust the starting time forwards.
00144                         LONGLONG msecAdjustment = mymin(msecOff * 
00145                                 m_data->mClockFrequency.QuadPart / 1000, elapsedTime - 
00146                                 m_data->mPrevElapsedTime);
00147                         m_data->mStartTime.QuadPart += msecAdjustment;
00148                         elapsedTime -= msecAdjustment;
00149 
00150                         // Recompute the number of millisecond ticks elapsed.
00151                         msecTicks = (unsigned long)(1000 * elapsedTime / 
00152                                 m_data->mClockFrequency.QuadPart);
00153                 }
00154 
00155                 // Store the current elapsed time for adjustments next time.
00156                 m_data->mPrevElapsedTime = elapsedTime;
00157 
00158                 return msecTicks;
00159 #else
00160 
00161 #ifdef __CELLOS_LV2__
00162                 uint64_t freq=sys_time_get_timebase_frequency();
00163                 double dFreq=((double) freq) / 1000.0;
00164                 typedef uint64_t  ClockSize;
00165                 ClockSize newTime;
00166                 SYS_TIMEBASE_GET( newTime );
00167                 //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
00168 
00169                 return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
00170 #else
00171 
00172                 struct timeval currentTime;
00173                 gettimeofday(&currentTime, 0);
00174                 return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000 + 
00175                         (currentTime.tv_usec - m_data->mStartTime.tv_usec) / 1000;
00176 #endif //__CELLOS_LV2__
00177 #endif
00178 }
00179 
00182 unsigned long int btClock::getTimeMicroseconds()
00183 {
00184 #ifdef BT_USE_WINDOWS_TIMERS
00185                 LARGE_INTEGER currentTime;
00186                 QueryPerformanceCounter(&currentTime);
00187                 LONGLONG elapsedTime = currentTime.QuadPart - 
00188                         m_data->mStartTime.QuadPart;
00189 
00190                 // Compute the number of millisecond ticks elapsed.
00191                 unsigned long msecTicks = (unsigned long)(1000 * elapsedTime / 
00192                         m_data->mClockFrequency.QuadPart);
00193 
00194                 // Check for unexpected leaps in the Win32 performance counter.  
00195                 // (This is caused by unexpected data across the PCI to ISA 
00196                 // bridge, aka south bridge.  See Microsoft KB274323.)
00197                 unsigned long elapsedTicks = GetTickCount() - m_data->mStartTick;
00198                 signed long msecOff = (signed long)(msecTicks - elapsedTicks);
00199                 if (msecOff < -100 || msecOff > 100)
00200                 {
00201                         // Adjust the starting time forwards.
00202                         LONGLONG msecAdjustment = mymin(msecOff * 
00203                                 m_data->mClockFrequency.QuadPart / 1000, elapsedTime - 
00204                                 m_data->mPrevElapsedTime);
00205                         m_data->mStartTime.QuadPart += msecAdjustment;
00206                         elapsedTime -= msecAdjustment;
00207                 }
00208 
00209                 // Store the current elapsed time for adjustments next time.
00210                 m_data->mPrevElapsedTime = elapsedTime;
00211 
00212                 // Convert to microseconds.
00213                 unsigned long usecTicks = (unsigned long)(1000000 * elapsedTime / 
00214                         m_data->mClockFrequency.QuadPart);
00215 
00216                 return usecTicks;
00217 #else
00218 
00219 #ifdef __CELLOS_LV2__
00220                 uint64_t freq=sys_time_get_timebase_frequency();
00221                 double dFreq=((double) freq)/ 1000000.0;
00222                 typedef uint64_t  ClockSize;
00223                 ClockSize newTime;
00224                 //__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
00225                 SYS_TIMEBASE_GET( newTime );
00226 
00227                 return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
00228 #else
00229 
00230                 struct timeval currentTime;
00231                 gettimeofday(&currentTime, 0);
00232                 return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000000 + 
00233                         (currentTime.tv_usec - m_data->mStartTime.tv_usec);
00234 #endif//__CELLOS_LV2__
00235 #endif 
00236 }
00237 
00238 
00239 
00240 
00241 
00242 inline void Profile_Get_Ticks(unsigned long int * ticks)
00243 {
00244         *ticks = gProfileClock.getTimeMicroseconds();
00245 }
00246 
00247 inline float Profile_Get_Tick_Rate(void)
00248 {
00249 //      return 1000000.f;
00250         return 1000.f;
00251 
00252 }
00253 
00254 
00255 
00256 /***************************************************************************************************
00257 **
00258 ** CProfileNode
00259 **
00260 ***************************************************************************************************/
00261 
00262 /***********************************************************************************************
00263  * INPUT:                                                                                      *
00264  * name - pointer to a static string which is the name of this profile node                    *
00265  * parent - parent pointer                                                                     *
00266  *                                                                                             *
00267  * WARNINGS:                                                                                   *
00268  * The name is assumed to be a static pointer, only the pointer is stored and compared for     *
00269  * efficiency reasons.                                                                         *
00270  *=============================================================================================*/
00271 CProfileNode::CProfileNode( const char * name, CProfileNode * parent ) :
00272         Name( name ),
00273         TotalCalls( 0 ),
00274         TotalTime( 0 ),
00275         StartTime( 0 ),
00276         RecursionCounter( 0 ),
00277         Parent( parent ),
00278         Child( NULL ),
00279         Sibling( NULL ),
00280         m_userPtr(0)
00281 {
00282         Reset();
00283 }
00284 
00285 
00286 void    CProfileNode::CleanupMemory()
00287 {
00288         delete ( Child);
00289         Child = NULL;
00290         delete ( Sibling);
00291         Sibling = NULL;
00292 }
00293 
00294 CProfileNode::~CProfileNode( void )
00295 {
00296         delete ( Child);
00297         delete ( Sibling);
00298 }
00299 
00300 
00301 /***********************************************************************************************
00302  * INPUT:                                                                                      *
00303  * name - static string pointer to the name of the node we are searching for                   *
00304  *                                                                                             *
00305  * WARNINGS:                                                                                   *
00306  * All profile names are assumed to be static strings so this function uses pointer compares   *
00307  * to find the named node.                                                                     *
00308  *=============================================================================================*/
00309 CProfileNode * CProfileNode::Get_Sub_Node( const char * name )
00310 {
00311         // Try to find this sub node
00312         CProfileNode * child = Child;
00313         while ( child ) {
00314                 if ( child->Name == name ) {
00315                         return child;
00316                 }
00317                 child = child->Sibling;
00318         }
00319 
00320         // We didn't find it, so add it
00321         
00322         CProfileNode * node = new CProfileNode( name, this );
00323         node->Sibling = Child;
00324         Child = node;
00325         return node;
00326 }
00327 
00328 
00329 void    CProfileNode::Reset( void )
00330 {
00331         TotalCalls = 0;
00332         TotalTime = 0.0f;
00333         
00334 
00335         if ( Child ) {
00336                 Child->Reset();
00337         }
00338         if ( Sibling ) {
00339                 Sibling->Reset();
00340         }
00341 }
00342 
00343 
00344 void    CProfileNode::Call( void )
00345 {
00346         TotalCalls++;
00347         if (RecursionCounter++ == 0) {
00348                 Profile_Get_Ticks(&StartTime);
00349         }
00350 }
00351 
00352 
00353 bool    CProfileNode::Return( void )
00354 {
00355         if ( --RecursionCounter == 0 && TotalCalls != 0 ) { 
00356                 unsigned long int time;
00357                 Profile_Get_Ticks(&time);
00358                 time-=StartTime;
00359                 TotalTime += (float)time / Profile_Get_Tick_Rate();
00360         }
00361         return ( RecursionCounter == 0 );
00362 }
00363 
00364 
00365 /***************************************************************************************************
00366 **
00367 ** CProfileIterator
00368 **
00369 ***************************************************************************************************/
00370 CProfileIterator::CProfileIterator( CProfileNode * start )
00371 {
00372         CurrentParent = start;
00373         CurrentChild = CurrentParent->Get_Child();
00374 }
00375 
00376 
00377 void    CProfileIterator::First(void)
00378 {
00379         CurrentChild = CurrentParent->Get_Child();
00380 }
00381 
00382 
00383 void    CProfileIterator::Next(void)
00384 {
00385         CurrentChild = CurrentChild->Get_Sibling();
00386 }
00387 
00388 
00389 bool    CProfileIterator::Is_Done(void)
00390 {
00391         return CurrentChild == NULL;
00392 }
00393 
00394 
00395 void    CProfileIterator::Enter_Child( int index )
00396 {
00397         CurrentChild = CurrentParent->Get_Child();
00398         while ( (CurrentChild != NULL) && (index != 0) ) {
00399                 index--;
00400                 CurrentChild = CurrentChild->Get_Sibling();
00401         }
00402 
00403         if ( CurrentChild != NULL ) {
00404                 CurrentParent = CurrentChild;
00405                 CurrentChild = CurrentParent->Get_Child();
00406         }
00407 }
00408 
00409 
00410 void    CProfileIterator::Enter_Parent( void )
00411 {
00412         if ( CurrentParent->Get_Parent() != NULL ) {
00413                 CurrentParent = CurrentParent->Get_Parent();
00414         }
00415         CurrentChild = CurrentParent->Get_Child();
00416 }
00417 
00418 
00419 /***************************************************************************************************
00420 **
00421 ** CProfileManager
00422 **
00423 ***************************************************************************************************/
00424 
00425 CProfileNode    CProfileManager::Root( "Root", NULL );
00426 CProfileNode *  CProfileManager::CurrentNode = &CProfileManager::Root;
00427 int                             CProfileManager::FrameCounter = 0;
00428 unsigned long int                       CProfileManager::ResetTime = 0;
00429 
00430 
00431 /***********************************************************************************************
00432  * CProfileManager::Start_Profile -- Begin a named profile                                    *
00433  *                                                                                             *
00434  * Steps one level deeper into the tree, if a child already exists with the specified name     *
00435  * then it accumulates the profiling; otherwise a new child node is added to the profile tree. *
00436  *                                                                                             *
00437  * INPUT:                                                                                      *
00438  * name - name of this profiling record                                                        *
00439  *                                                                                             *
00440  * WARNINGS:                                                                                   *
00441  * The string used is assumed to be a static string; pointer compares are used throughout      *
00442  * the profiling code for efficiency.                                                          *
00443  *=============================================================================================*/
00444 void    CProfileManager::Start_Profile( const char * name )
00445 {
00446         if (name != CurrentNode->Get_Name()) {
00447                 CurrentNode = CurrentNode->Get_Sub_Node( name );
00448         } 
00449         
00450         CurrentNode->Call();
00451 }
00452 
00453 
00454 /***********************************************************************************************
00455  * CProfileManager::Stop_Profile -- Stop timing and record the results.                       *
00456  *=============================================================================================*/
00457 void    CProfileManager::Stop_Profile( void )
00458 {
00459         // Return will indicate whether we should back up to our parent (we may
00460         // be profiling a recursive function)
00461         if (CurrentNode->Return()) {
00462                 CurrentNode = CurrentNode->Get_Parent();
00463         }
00464 }
00465 
00466 
00467 /***********************************************************************************************
00468  * CProfileManager::Reset -- Reset the contents of the profiling system                       *
00469  *                                                                                             *
00470  *    This resets everything except for the tree structure.  All of the timing data is reset.  *
00471  *=============================================================================================*/
00472 void    CProfileManager::Reset( void )
00473 { 
00474         gProfileClock.reset();
00475         Root.Reset();
00476     Root.Call();
00477         FrameCounter = 0;
00478         Profile_Get_Ticks(&ResetTime);
00479 }
00480 
00481 
00482 /***********************************************************************************************
00483  * CProfileManager::Increment_Frame_Counter -- Increment the frame counter                    *
00484  *=============================================================================================*/
00485 void CProfileManager::Increment_Frame_Counter( void )
00486 {
00487         FrameCounter++;
00488 }
00489 
00490 
00491 /***********************************************************************************************
00492  * CProfileManager::Get_Time_Since_Reset -- returns the elapsed time since last reset         *
00493  *=============================================================================================*/
00494 float CProfileManager::Get_Time_Since_Reset( void )
00495 {
00496         unsigned long int time;
00497         Profile_Get_Ticks(&time);
00498         time -= ResetTime;
00499         return (float)time / Profile_Get_Tick_Rate();
00500 }
00501 
00502 #include <stdio.h>
00503 
00504 void    CProfileManager::dumpRecursive(CProfileIterator* profileIterator, int spacing)
00505 {
00506         profileIterator->First();
00507         if (profileIterator->Is_Done())
00508                 return;
00509 
00510         float accumulated_time=0,parent_time = profileIterator->Is_Root() ? CProfileManager::Get_Time_Since_Reset() : profileIterator->Get_Current_Parent_Total_Time();
00511         int i;
00512         int frames_since_reset = CProfileManager::Get_Frame_Count_Since_Reset();
00513         for (i=0;i<spacing;i++) printf(".");
00514         printf("----------------------------------\n");
00515         for (i=0;i<spacing;i++) printf(".");
00516         printf("Profiling: %s (total running time: %.3f ms) ---\n",     profileIterator->Get_Current_Parent_Name(), parent_time );
00517         float totalTime = 0.f;
00518 
00519         
00520         int numChildren = 0;
00521         
00522         for (i = 0; !profileIterator->Is_Done(); i++,profileIterator->Next())
00523         {
00524                 numChildren++;
00525                 float current_total_time = profileIterator->Get_Current_Total_Time();
00526                 accumulated_time += current_total_time;
00527                 float fraction = parent_time > SIMD_EPSILON ? (current_total_time / parent_time) * 100 : 0.f;
00528                 {
00529                         int i;  for (i=0;i<spacing;i++) printf(".");
00530                 }
00531                 printf("%d -- %s (%.2f %%) :: %.3f ms / frame (%d calls)\n",i, profileIterator->Get_Current_Name(), fraction,(current_total_time / (double)frames_since_reset),profileIterator->Get_Current_Total_Calls());
00532                 totalTime += current_total_time;
00533                 //recurse into children
00534         }
00535 
00536         if (parent_time < accumulated_time)
00537         {
00538                 printf("what's wrong\n");
00539         }
00540         for (i=0;i<spacing;i++) printf(".");
00541         printf("%s (%.3f %%) :: %.3f ms\n", "Unaccounted:",parent_time > SIMD_EPSILON ? ((parent_time - accumulated_time) / parent_time) * 100 : 0.f, parent_time - accumulated_time);
00542         
00543         for (i=0;i<numChildren;i++)
00544         {
00545                 profileIterator->Enter_Child(i);
00546                 dumpRecursive(profileIterator,spacing+3);
00547                 profileIterator->Enter_Parent();
00548         }
00549 }
00550 
00551 
00552 
00553 void    CProfileManager::dumpAll()
00554 {
00555         CProfileIterator* profileIterator = 0;
00556         profileIterator = CProfileManager::Get_Iterator();
00557 
00558         dumpRecursive(profileIterator,0);
00559 
00560         CProfileManager::Release_Iterator(profileIterator);
00561 }
00562 
00563 
00564 
00565 
00566 #endif //BT_NO_PROFILE