1.Remotery簡介
Remotery是一個輕量級的遠端實時CPU/GPU分析器,主要用於監控CPU和GPU上多執行緒的活動。它提供了一個C檔案,可以很容易的整合到專案中,並配置一個實時監控的Web介面,可以透過遠端觀察和分析程式的效能,適用於監控遊戲的實時執行效能和分析移動端應用的效能場景。
2.Remotery編譯執行
Remotery可以定義一些額外的宏來修改要編譯到Remotery中的功能:
Macro Default Description
RMT_ENABLED 1 Disable this to not include any bits of Remotery in your build
RMT_USE_TINYCRT 0 Used by the Celtoys TinyCRT library (not released yet)
RMT_USE_CUDA 0 Assuming CUDA headers/libs are setup, allow CUDA profiling
RMT_USE_D3D11 0 Assuming Direct3D 11 headers/libs are setup, allow D3D11 GPU profiling
RMT_USE_OPENGL 0 Allow OpenGL GPU profiling (dynamically links OpenGL libraries on available platforms)
RMT_USE_METAL 0 Allow Metal profiling of command buffers
2.1 Linux平臺下
Linux(GCC):在lib資料夾中新增原始碼。編譯程式碼需要-pthreads來進行庫連結。例如,編譯Remotery庫本身並執行:
cc lib/Remotery.c sample/sample.c -I lib -pthread -lm
編譯後,會在目錄下生成一個a.out可執行檔案,./a.out後,雙擊vis/index.html,即可看到執行緒執行情況
2.2 Windows平臺下
Windows(MSVC)-將lib/Remotery.c和lib/Remotery.h新增到程式中。設定include目錄以新增Remotery/lib路徑。所需的庫ws2_32.lib應該透過在Remotery.c中使用#pragma comment(lib,ws2_32.lib)指令來獲取。
3.Remotery各功能類簡介
3.1 基礎功能類
3.1.1 New
帶有錯誤值的新建/刪除運算子,用於簡化物件建立/銷燬
// Ensures the pointer is non-NULL, calls the destructor, frees memory and sets the pointer to NULL
#define Delete(type, obj) \
if (obj != NULL) \
{ \
type##_Destructor(obj); \
rmtFree(obj); \
obj = NULL; \
}
#define BeginNew(type, obj) \
{ \
obj = (type*)rmtMalloc(sizeof(type)); \
if (obj == NULL) \
{ \
error = RMT_ERROR_MALLOC_FAIL; \
} \
else \
{ \
#define EndNew(type, obj) \
if (error != RMT_ERROR_NONE) \
Delete(type, obj); \
} \
}
// Specialisations for New with varying constructor parameter counts
#define New_0(type, obj) \
BeginNew(type, obj); error = type##_Constructor(obj); EndNew(type, obj)
#define New_1(type, obj, a0) \
BeginNew(type, obj); error = type##_Constructor(obj, a0); EndNew(type, obj)
#define New_2(type, obj, a0, a1) \
BeginNew(type, obj); error = type##_Constructor(obj, a0, a1); EndNew(type, obj)
#define New_3(type, obj, a0, a1, a2) \
BeginNew(type, obj); error = type##_Constructor(obj, a0, a1, a2); EndNew(type, obj)
3.1.2 Deps
外部依賴項
rmtU8 minU8(rmtU8 a, rmtU8 b);
rmtU16 maxU16(rmtU16 a, rmtU16 b);
rmtS64 maxS64(rmtS64 a, rmtS64 b);
void* rmtMalloc( rmtU32 size );
void* rmtRealloc( void* ptr, rmtU32 size);
void rmtFree( void* ptr );
3.1.3 ObjAlloc
可重用物件分配器
//
// All objects that require free-list-backed allocation need to inherit from this type.
// 所有需要自由列表支援分配的物件都需要從此型別繼承。
//
typedef struct ObjectLink_s
{
struct ObjectLink_s* volatile next;
} ObjectLink;
typedef struct
{
// Object create/destroy parameters
// 物件建立/銷燬引數
rmtU32 object_size;
ObjConstructor constructor;
ObjDestructor destructor;
// Number of objects in the free list
// 自由列表中的物件數量
volatile rmtS32 nb_free;
// Number of objects used by callers
// 呼叫方使用的物件數
volatile rmtS32 nb_inuse;
// Total allocation count
// 總分配計數
volatile rmtS32 nb_allocated;
ObjectLink* first_free;
} ObjectAllocator;
void ObjectLink_Constructor(ObjectLink* link);
rmtError ObjectAllocator_Constructor(ObjectAllocator* allocator, rmtU32 object_size, ObjConstructor constructor, ObjDestructor destructor);
void ObjectAllocator_Destructor(ObjectAllocator* allocator);
void ObjectAllocator_Push(ObjectAllocator* allocator, ObjectLink* start, ObjectLink* end);
ObjectLink* ObjectAllocator_Pop(ObjectAllocator* allocator);
rmtError ObjectAllocator_Alloc(ObjectAllocator* allocator, void** object);
void ObjectAllocator_Free(ObjectAllocator* allocator, void* object);
void ObjectAllocator_FreeRange(ObjectAllocator* allocator, void* start, void* end, rmtU32 count);
3.1.4 Safec
安全C庫摘錄
r_size_t strnlen_s (const char *dest, r_size_t dmax);
errno_t strstr_s (char *dest, r_size_t dmax,
const char *src, r_size_t slen, char **substring);
errno_t strncat_s (char *dest, r_size_t dmax, const char *src, r_size_t slen);
errno_t strcpy_s(char *dest, r_size_t dmax, const char *src);
void itoahex_s( char *dest, r_size_t dmax, rmtS32 value );
3.1.5 SHA1
SHA-1加密雜湊函式
typedef struct
{
rmtU8 data[20];
} SHA1;
unsigned int rol(const unsigned int value, const unsigned int steps);
void clearWBuffert(unsigned int* buffert);
void innerHash(unsigned int* result, unsigned int* w);
void calc(const void* src, const int bytelength, unsigned char* hash);
SHA1 SHA1_Calculate(const void* src, unsigned int length);
3.1.6 BASE64
Base-64編碼器
rmtU32 Base64_CalculateEncodedLength(rmtU32 length);
void Base64_Encode(const rmtU8* in_bytes, rmtU32 length, rmtU8* out_bytes);
3.1.7 Murmurhash
Murmur-Hash 3
rmtU32 rotl32(rmtU32 x, rmtS8 r);
rmtU32 getblock32(const rmtU32* p, int i);
rmtU32 fmix32(rmtU32 h);
rmtU32 MurmurHash3_x86_32(const void* key, int len, rmtU32 seed);
3.2 執行緒併發功能類
3.2.1 Tls
執行緒區域性儲存(pthread_key_create/pthread_setspecific)
rmtError tlsAlloc(rmtTLS* handle);
void tlsFree(rmtTLS handle);
void tlsSet(rmtTLS handle, void* value);
void* tlsGet(rmtTLS handle);
3.2.2 Atomic
原子操作
rmtBool AtomicCompareAndSwap(rmtU32 volatile* val, long old_val, long new_val);
rmtBool AtomicCompareAndSwapPointer(long* volatile* ptr, long* old_ptr, long* new_ptr);
rmtS32 AtomicAdd(rmtS32 volatile* value, rmtS32 add);
void AtomicSub(rmtS32 volatile* value, rmtS32 sub);
void CompilerWriteFence();
void CompilerReadFence();
rmtU32 LoadAcquire(rmtU32* volatile address);
long* LoadAcquirePointer(long* volatile* ptr);
void StoreRelease(rmtU32* volatile address, rmtU32 value);
void StoreReleasePointer(long* volatile* ptr, long* value);
3.2.3 Threads
執行緒處理
typedef struct Thread_t rmtThread;
typedef rmtError(*ThreadProc)(rmtThread* thread);
struct Thread_t
{
// OS-specific data
// 作業系統特定資料
#if defined(RMT_PLATFORM_WINDOWS)
HANDLE handle;
#else
pthread_t handle;
#endif
// Callback executed when the thread is created
// 建立執行緒時執行的回撥
ThreadProc callback;
// Caller-specified parameter passed to Thread_Create
// 傳遞給Thread_Create的呼叫方指定引數
void* param;
// Error state returned from callback
// 回撥返回錯誤狀態
rmtError error;
// External threads can set this to request an exit
// 外部執行緒可以將其設定為請求退出
volatile rmtBool request_exit;
};
int rmtThread_Valid(rmtThread* thread);
rmtError rmtThread_Constructor(rmtThread* thread, ThreadProc callback, void* param);
void rmtThread_RequestExit(rmtThread* thread);
void rmtThread_Join(rmtThread* thread);
void rmtThread_Destructor(rmtThread* thread);
3.2.4 DynBuf
動態緩衝器
typedef struct
{
rmtU32 alloc_granularity;
rmtU32 bytes_allocated;
rmtU32 bytes_used;
rmtU8* data;
} Buffer;
rmtError Buffer_Constructor(Buffer* buffer, rmtU32 alloc_granularity);
void Buffer_Destructor(Buffer* buffer);
rmtError Buffer_Grow(Buffer* buffer, rmtU32 length);
rmtError Buffer_Write(Buffer* buffer, const void* data, rmtU32 length);
rmtError Buffer_WriteStringZ(Buffer* buffer, rmtPStr string);
void U32ToByteArray(rmtU8* dest, rmtU32 value);
rmtError Buffer_WriteU32(Buffer* buffer, rmtU32 value);
rmtBool IsLittleEndian();
rmtError Buffer_WriteU64(Buffer* buffer, rmtU64 value);
rmtError Buffer_WriteStringWithLength(Buffer* buffer, rmtPStr string);
3.3 網路服務功能類
3.3.1 Sockets
TCP/IP Sockets
typedef struct
{
SOCKET socket;
} TCPSocket;
typedef struct
{
rmtBool can_read;
rmtBool can_write;
rmtError error_state;
} SocketStatus;
rmtError TCPSocket_Constructor(TCPSocket* tcp_socket);
void TCPSocket_Destructor(TCPSocket* tcp_socket);
rmtError TCPSocket_RunServer(TCPSocket* tcp_socket, rmtU16 port, rmtBool reuse_open_port, rmtBool limit_connections_to_localhost);
void TCPSocket_Close(TCPSocket* tcp_socket);
SocketStatus TCPSocket_PollStatus(TCPSocket* tcp_socket);
rmtError TCPSocket_AcceptConnection(TCPSocket* tcp_socket, TCPSocket** client_socket);
int TCPSocketWouldBlock();
rmtError TCPSocket_Send(TCPSocket* tcp_socket, const void* data, rmtU32 length, rmtU32 timeout_ms);
rmtError TCPSocket_Receive(TCPSocket* tcp_socket, void* data, rmtU32 length, rmtU32 timeout_ms);
3.3.2 WebSockets
WebSockets
enum WebSocketMode
{
WEBSOCKET_NONE = 0,
WEBSOCKET_TEXT = 1,
WEBSOCKET_BINARY = 2,
};
typedef struct
{
TCPSocket* tcp_socket;
enum WebSocketMode mode;
rmtU32 frame_bytes_remaining;
rmtU32 mask_offset;
union
{
rmtU8 mask[4];
rmtU32 mask_u32;
} data;
} WebSocket;
char* GetField(char* buffer, r_size_t buffer_length, rmtPStr field_name);
rmtError WebSocketHandshake(TCPSocket* tcp_socket, rmtPStr limit_host);
rmtError WebSocket_Constructor(WebSocket* web_socket, TCPSocket* tcp_socket);
void WebSocket_Destructor(WebSocket* web_socket);
rmtError WebSocket_RunServer(WebSocket* web_socket, rmtU16 port, rmtBool reuse_open_port, rmtBool limit_connections_to_localhost, enum WebSocketMode mode);
void WebSocket_Close(WebSocket* web_socket);
SocketStatus WebSocket_PollStatus(WebSocket* web_socket);
rmtError WebSocket_AcceptConnection(WebSocket* web_socket, WebSocket** client_socket);
void WriteSize(rmtU32 size, rmtU8* dest, rmtU32 dest_size, rmtU32 dest_offset);
void WebSocket_PrepareBuffer(Buffer* buffer);
rmtU32 WebSocket_FrameHeaderSize(rmtU32 length);
void WebSocket_WriteFrameHeader(WebSocket* web_socket, rmtU8* dest, rmtU32 length);
rmtError WebSocket_Send(WebSocket* web_socket, const void* data, rmtU32 length, rmtU32 timeout_ms);
rmtError ReceiveFrameHeader(WebSocket* web_socket);
rmtError WebSocket_Receive(WebSocket* web_socket, void* data, rmtU32* msg_len, rmtU32 length, rmtU32 timeout_ms);
3.3.3 Network
網路伺服器
typedef rmtError (*Server_ReceiveHandler)(void*, char*, rmtU32);
typedef struct
{
WebSocket* listen_socket;
WebSocket* client_socket;
rmtU32 last_ping_time;
rmtU16 port;
rmtBool reuse_open_port;
rmtBool limit_connections_to_localhost;
// A dynamically-sized buffer used for binary-encoding messages and sending to the client
Buffer* bin_buf;
// Handler for receiving messages from the client
Server_ReceiveHandler receive_handler;
void* receive_handler_context;
} Server;
rmtError Server_CreateListenSocket(Server* server, rmtU16 port, rmtBool reuse_open_port, rmtBool limit_connections_to_localhost);
rmtError Server_Constructor(Server* server, rmtU16 port, rmtBool reuse_open_port, rmtBool limit_connections_to_localhost);
void Server_Destructor(Server* server);
rmtBool Server_IsClientConnected(Server* server);
void Server_DisconnectClient(Server* server);
rmtError Server_Send(Server* server, const void* data, rmtU32 length, rmtU32 timeout);
rmtError Server_ReceiveMessage(Server* server, char message_first_byte, rmtU32 message_length);
void Server_Update(Server* server);
3.4 負載率取樣功能類
3.4.1 Timers
特定於平臺的計時器
功能類函式介面:
// 微秒精度高效能計數器
#ifndef RMT_PLATFORM_WINDOWS
typedef rmtU64 LARGE_INTEGER;
#endif
typedef struct
{
LARGE_INTEGER counter_start;
double counter_scale;
} usTimer;
void usTimer_Init(usTimer* timer);
rmtU32 msTimer_Get(); // 獲取ms時間值
rmtU64 usTimer_Get(usTimer* timer);
void msSleep(rmtU32 time_ms);
3.4.2 Sample
基本取樣說明(預設情況下為CPU)
typedef enum SampleType
{
SampleType_CPU,
SampleType_CUDA,
SampleType_D3D11,
SampleType_OpenGL,
SampleType_Metal,
SampleType_Count,
} SampleType;
typedef struct Sample
{
// Inherit so that samples can be quickly allocated
ObjectLink Link;
enum SampleType type;
// Used to anonymously copy sample data without knowning its type
rmtU32 size_bytes;
// Hash generated from sample name
//根據樣本名稱生成的雜湊
rmtU32 name_hash;
// Unique, persistent ID among all samples
// 所有樣本中唯一、持久的ID
rmtU32 unique_id;
// Null-terminated string storing the hash-prefixed 6-digit colour
rmtU8 unique_id_html_colour[8];
// Links to related samples in the tree
struct Sample* parent;
struct Sample* first_child;
struct Sample* last_child;
struct Sample* next_sibling;
// Keep track of child count to distinguish from repeated calls to the same function at the same stack level
// This is also mixed with the callstack hash to allow consistent addressing of any point in the tree
rmtU32 nb_children;
// Sample end points and length in microseconds
rmtU64 us_start;
rmtU64 us_end;
rmtU64 us_length;
// Total sampled length of all children
rmtU64 us_sampled_length;
// Number of times this sample was used in a call in aggregate mode, 1 otherwise
rmtU32 call_count;
// Current and maximum sample recursion depths
rmtU16 recurse_depth;
rmtU16 max_recurse_depth;
} Sample;
rmtError Sample_Constructor(Sample* sample);
void Sample_Destructor(Sample* sample);
void Sample_Prepare(Sample* sample, rmtU32 name_hash, Sample* parent);
rmtError bin_Sample(Buffer* buffer, Sample* sample);
rmtError bin_SampleArray(Buffer* buffer, Sample* parent_sample);
3.4.3 SampleTree
帶有分配器的樣本樹
typedef struct SampleTree
{
// Allocator for all samples
ObjectAllocator* allocator;
// Root sample for all samples created by this thread
Sample* root;
// Most recently pushed sample
Sample* current_parent;
} SampleTree;
typedef struct Msg_SampleTree
{
Sample* root_sample;
ObjectAllocator* allocator;
rmtPStr thread_name;
} Msg_SampleTree;
rmtError SampleTree_Constructor(SampleTree* tree, rmtU32 sample_size, ObjConstructor constructor, ObjDestructor destructor);
void SampleTree_Destructor(SampleTree* tree);
rmtU32 HashCombine(rmtU32 hash_a, rmtU32 hash_b);
rmtError SampleTree_Push(SampleTree* tree, rmtU32 name_hash, rmtU32 flags, Sample** sample);
void SampleTree_Pop(SampleTree* tree, Sample* sample);
ObjectLink* FlattenSampleTree(Sample* sample, rmtU32* nb_samples);
void FreeSampleTree(Sample* sample, ObjectAllocator* allocator);
void AddSampleTreeMessage(rmtMessageQueue* queue, Sample* sample, ObjectAllocator* allocator, rmtPStr thread_name, struct ThreadSampler* thread_sampler)
3.4.4 Tsampler
每個執行緒的取樣器
typedef struct ThreadSampler
{
// Name to assign to the thread in the viewer
rmtS8 name[256];
// Store a unique sample tree for each type
//為每種型別儲存一個唯一的樣本樹
SampleTree* sample_trees[SampleType_Count];
// Table of all sample names encountered on this thread
// 此執行緒上遇到的所有示例名稱表
StringTable* names;
#if RMT_USE_D3D11
D3D11* d3d11;
#endif
// Next in the global list of active thread samplers
//全域性活動執行緒取樣器列表中的下一個
struct ThreadSampler* volatile next;
} ThreadSampler;
rmtError ThreadSampler_Constructor(ThreadSampler* thread_sampler);
void ThreadSampler_Destructor(ThreadSampler* ts);
rmtError ThreadSampler_Push(SampleTree* tree, rmtU32 name_hash, rmtU32 flags, Sample** sample);
rmtBool ThreadSampler_Pop(ThreadSampler* ts, rmtMessageQueue* queue, Sample* sample);
rmtU32 ThreadSampler_GetNameHash(ThreadSampler* ts, rmtPStr name, rmtU32* hash_cache);
3.5 訊息佇列類
3.5.1 Vmbuffer
使用虛擬記憶體進行自動換行的映象緩衝區
typedef struct VirtualMirrorBuffer
{
// Page-rounded size of the buffer without mirroring
rmtU32 size;
// Pointer to the first part of the mirror
// The second part comes directly after at ptr+size bytes
rmtU8* ptr;
#ifdef RMT_PLATFORM_WINDOWS
#ifdef _XBOX_ONE
size_t page_count;
size_t* page_mapping;
#else
HANDLE file_map_handle;
#endif
#endif
} VirtualMirrorBuffer;
rmtError VirtualMirrorBuffer_Constructor(VirtualMirrorBuffer* buffer, rmtU32 size, int nb_attempts);
void VirtualMirrorBuffer_Destructor(VirtualMirrorBuffer* buffer);
3.5.2 HashTable
用於插入/查詢的整數對雜湊對映。為了增加簡單性,沒有刪除。
typedef struct
{
// Non-zero, pre-hashed key
rmtU32 key;
// Value that's not equal to RMT_NOT_FOUND
rmtU32 value;
} HashSlot;
typedef struct
{
// Stats
rmtU32 max_nb_slots;
rmtU32 nb_slots;
// Data
HashSlot* slots;
} rmtHashTable;
rmtError rmtHashTable_Constructor(rmtHashTable* table, rmtU32 max_nb_slots);
void rmtHashTable_Destructor(rmtHashTable* table);
rmtError rmtHashTable_Insert(rmtHashTable* table, rmtU32 key, rmtU32 value);
rmtError rmtHashTable_Resize(rmtHashTable* table);
rmtU32 rmtHashTable_Find(rmtHashTable* table, rmtU32 key);
3.5.3 StringTable
從字串雜湊對映到本地緩衝區中的字串偏移
typedef struct
{
// Growable dynamic array of strings added so far
Buffer* text;
// Map from text hash to text location in the buffer
rmtHashTable* text_map;
} StringTable;
rmtError StringTable_Constructor(StringTable* table);
void StringTable_Destructor(StringTable* table);
rmtPStr StringTable_Find(StringTable* table, rmtU32 name_hash);
void StringTable_Insert(StringTable* table, rmtU32 name_hash, rmtPStr name);
3.5.4 Messageq
多生產者、單消費者訊息佇列
typedef enum MessageID
{
MsgID_NotReady,
MsgID_LogText,
MsgID_SampleTree,
MsgID_None,
MsgID_Force32Bits = 0xFFFFFFFF,
} MessageID;
typedef struct Message
{
MessageID id;
rmtU32 payload_size;
// For telling which thread the message came from in the debugger
struct ThreadSampler* thread_sampler;
rmtU8 payload[1];
} Message;
typedef struct rmtMessageQueue
{
rmtU32 size;
// The physical address of this data buffer is pointed to by two sequential
// virtual memory pages, allowing automatic wrap-around of any reads or writes
// that exceed the limits of the buffer.
// 該資料緩衝區的實體地址由兩個順序的指標指向
// 虛擬記憶體頁,允許自動環繞任何讀取或寫入
// 超出緩衝區的限制。
VirtualMirrorBuffer* data;
// Read/write position never wrap allowing trivial overflow checks
// with easier debugging
//讀/寫位置從不換行,允許進行瑣碎的溢位檢查
//更容易除錯
rmtU32 read_pos;
rmtU32 write_pos;
} rmtMessageQueue;
rmtError rmtMessageQueue_Constructor(rmtMessageQueue* queue, rmtU32 size);
void rmtMessageQueue_Destructor(rmtMessageQueue* queue);
rmtU32 rmtMessageQueue_SizeForPayload(rmtU32 payload_size);
static Message* rmtMessageQueue_AllocMessage(rmtMessageQueue* queue, rmtU32 payload_size, struct ThreadSampler* thread_sampler);
void rmtMessageQueue_CommitMessage(Message* message, MessageID id);
Message* rmtMessageQueue_PeekNextMessage(rmtMessageQueue* queue);
void rmtMessageQueue_ConsumeNextMessage(rmtMessageQueue* queue, Message* message);
3.6 主功能類
3.6.1 Remotry
struct Remotery
{
Server* server;
// Microsecond accuracy timer for CPU timestamps
usTimer timer;
rmtTLS thread_sampler_tls_handle;
// Linked list of all known threads being sampled
ThreadSampler* volatile first_thread_sampler;
// Queue between clients and main remotery thread
rmtMessageQueue* mq_to_rmt_thread;
// The main server thread
rmtThread* thread;
// Set to trigger a map of each message on the remotery thread message queue
void (*map_message_queue_fn)(Remotery* rmt, Message*);
void* map_message_queue_data;
#if RMT_USE_CUDA
rmtCUDABind cuda;
#endif
#if RMT_USE_OPENGL
OpenGL* opengl;
#endif
#if RMT_USE_METAL
Metal* metal;
#endif
};
void GetSampleDigest(Sample* sample, rmtU32* digest_hash, rmtU32* nb_samples);
rmtError Remotery_SendLogTextMessage(Remotery* rmt, Message* message);
rmtError bin_SampleTree(Buffer* buffer, Msg_SampleTree* msg);
rmtError Remotery_SendSampleTreeMessage(Remotery* rmt, Message* message);
rmtError Remotery_ConsumeMessageQueue(Remotery* rmt);
void Remotery_FlushMessageQueue(Remotery* rmt);
void Remotery_MapMessageQueue(Remotery* rmt);
rmtError Remotery_ThreadMain(rmtThread* thread);
rmtError Remotery_ReceiveMessage(void* context, char* message_data, rmtU32 message_length);
rmtError Remotery_Constructor(Remotery* rmt);
void Remotery_Destructor(Remotery* rmt);
rmtError Remotery_GetThreadSampler(Remotery* rmt, ThreadSampler** thread_sampler);C
void Remotery_DestroyThreadSamplers(Remotery* rmt);
void* CRTMalloc(void* mm_context, rmtU32 size);
void CRTFree(void* mm_context, void* ptr);
void* CRTRealloc(void* mm_context, void* ptr, rmtU32 size);
RMI API進行了標頭檔案宣告,用於Remotery做為三方庫時,API被其他庫呼叫:
RMT_API rmtSettings* _rmt_Settings( void );
RMT_API enum rmtError _rmt_CreateGlobalInstance(Remotery** remotery);
RMT_API void _rmt_DestroyGlobalInstance(Remotery* remotery);
RMT_API void _rmt_SetGlobalInstance(Remotery* remotery);
RMT_API Remotery* _rmt_GetGlobalInstance(void);
RMT_API void _rmt_SetCurrentThreadName(rmtPStr thread_name);
RMT_API void _rmt_LogText(rmtPStr text);
RMT_API void _rmt_BeginCPUSample(rmtPStr name, rmtU32 flags, rmtU32* hash_cache);
RMT_API void _rmt_EndCPUSample(void);
3.6.2 CUDA
CUDA事件負載率取樣
typedef struct CUDASample
{
// IS-A inheritance relationship
Sample base;
// Pair of events that wrap the sample
CUevent event_start;
CUevent event_end;
} CUDASample;
rmtError MapCUDAResult(CUresult result);
rmtError CUDASetContext(void* context);
rmtError CUDAGetContext(void** context);
rmtError CUDAEnsureContext();
rmtError CUDAEventCreate(CUevent* phEvent, unsigned int Flags);
rmtError CUDAEventDestroy(CUevent hEvent);
rmtError CUDAEventRecord(CUevent hEvent, void* hStream);
rmtError CUDAEventQuery(CUevent hEvent);
rmtError CUDAEventElapsedTime(float* pMilliseconds, CUevent hStart, CUevent hEnd);
rmtError CUDASample_Constructor(CUDASample* sample);
void CUDASample_Destructor(CUDASample* sample);
rmtBool AreCUDASamplesReady(Sample* sample);
rmtBool GetCUDASampleTimes(Sample* root_sample, Sample* sample);
RMT_API void _rmt_BindCUDA(const rmtCUDABind* bind);
RMT_API void _rmt_BeginCUDASample(rmtPStr name, rmtU32* hash_cache, void* stream);
RMT_API void _rmt_EndCUDASample(void* stream);
3.6.3 D3D11
Direct3D 11事件取樣
3.6.4 OPENGL
OpenGL事件取樣
3.6.5 METAL
metal事件取樣
4.Remotery主體設計
主函式主流程:
rmtError Remotery_ThreadMain(rmtThread* thread)
網路功能類繼承關係:
如何計算的每個函式的CPU負載呢?
初步分析,Remotey是透過在執行一個執行緒前將這個執行緒/函式名(name)加入一個hash表中(_rmt_BeginCPUSample(rmtPStr name, rmtU32 flags, rmtU32* hash_cache)),然後獲取當前的時間,執行此執行緒/函式,再呼叫_rmt_EndCPUSample()再獲取一個時間,這樣就算出了一個函式的執行時間。作為計算負載的依據。
開源地址:
https://gitee.com/stlstl/Remotery.git