R. Clayton (rclayton@clayton.cs.monmouth.edu)
(no date)
[George Huber contributed these timings for thread create-delete times on
windows (I'm guessing nt).]
Thread and Process creation times on Windows]
System: 200 MHz intel processor, 64 MBytes RAM
Note: The timer class uses an intel instruction (RDTSC - Read Time Stamp
Counter) which returns the number of clock cycles passed since booting the CPU
in a 64 bit integer. This enables us to get 5 ns precision in timing on a 200
MHz machine. Unfortunetly, this op-code is not accessable through inline
assembly, thus we need to enter the machine code directly to access this value.
Thread creation times were measured in three modes, Debug, Release (default
optimization) and Release (Max speed optimization). The program created 2000
threads and waited for each thread to exit prior to creating the next thread.
Two overheads were of interest, the overhead with the timer (which was built
into the time class) and the overhead with an empty loop. The average times
for ten runs of the program, with the standard deviation (in paranthesis) is
given given in table 1.
Table 1 - thread creation time, nanoseconds
timer loop create
Debug: 1860 (250) 25 (1) 865260 (38930)
Release, speed: 70 (0.3) 0 (0) 891300 (13760)
Release, default: 300 (102) 30 (.5) 874065 (20750)
Process creation times were like-wise measured in the same three modes. Unlike
UNIX systems which has 'fork' and 'exec' system calls, windows has a single
call that combines these two function. In addition, the CreateProcess system
call takes a number of parameters specifing how the new process should be
created and start. In this the defaults were used for all of the parameters
except the creation flag, where the value DETACED_PROCESS was used. This
prevented the new process from creating a console window. Since CreateProcess
spawns a new process, a second executable was created with preformed no actions
('proc.exe') for this text. The average times for ten runs on the program,
with the standard deviation (in paranthesis) is given in table 2.
Table 2 - process creation time, nanoseconds
debug: 1325 (180) 30 (0.5) 1376200 (247870)
Release, speed: 70 (0.4) 0 (0) 1162920 (152320)
Release, default: 455 (170) 30 (1.8) 1062245 ( 89255)
As expected, it is clear that it is cheaper to create threads then it is to
create processes by about sixty percent. It takes about 876865 ns (8.7E-4 sec)
to create a thread while it takes about 1200455 ns (1.2E-3 sec) to create a
process
Source code:
// timer.h -- declaration and defination of timer class
#pragma once
#pragma warning(disable : 4035)
inline unsigned __int64 GetCycleCount(void)
{
_asm _emit 0x0F
_asm _emit 0x31
}
class CTimer
{
unsigned __int64 m_startcycle;
public:
unsigned __int64 m_overhead;
CTimer(void)
{
m_overhead = 0;
Start();
m_overhead = Stop();
}
void Start(void)
{
m_startcycle = GetCycleCount();
}
unsigned __int64 Stop(void)
{
return GetCycleCount() - m_startcycle-m_overhead;
}
};
// Timing.cpp : Defines the entry point for the console application.
//
#include <windows.h>
#include <stdlib.h>
#include <iostream.h>
#include <process.h>
#include "timer.h"
#define iters 2000
void threadFun(void*);
int main(int argc, char* argv[])
{
int nIdx;
HANDLE tid;
CTimer timer;
// get processor speed and overhead associated with the timer......
timer.Start();
Sleep(1000);
unsigned cpuspeed10 = (unsigned)(timer.Stop()/100000);
cout << "CPU speed " << cpuspeed10/ 10 << "." << cpuspeed10 % 10
<< " mhz" << endl;
cout << "Timer overhead "
<< (unsigned)(timer.m_overhead* 10000 / cpuspeed10) << " ns\n";
// get overhead associated with doing the loop......
timer.Start();
for(nIdx = 0; nIdx < iters; nIdx++);
unsigned loop = (unsigned)(timer.Stop());
unsigned sloop = loop / iters;
cout << "Loop overhead " << sloop * 10000 / cpuspeed10 << " ns\n";
// get time for process creation....
timer.Start();
for(nIdx = 0; nIdx < iters; nIdx++)
{
tid = (HANDLE)_beginthread(threadFun, 0, NULL);
if(-1 == (unsigned long)tid)
{
exit(1);
}
WaitForSingleObject(tid, INFINITE); // wait for thread to end
}
unsigned create = (unsigned)(timer.Stop());
unsigned screate = create / iters;
cout << "Create Overhead " << screate * 10000 / cpuspeed10 << " ns\n";
return 0;
}
void threadFun(void* dummy)
{
}
===================== process timing source code
// Timing.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include "timer.h"
#include <process.h>
#include <windows.h>
#include <stdlib.h>
#include <iostream.h>
#define iters 2000
void threadFun(void*);
int main(int argc, char* argv[])
{
int nIdx;
CTimer timer;
STARTUPINFO si;
PROCESS_INFORMATION pi;
// get processor speed and overhead associated with the timer......
timer.Start();
Sleep(1000);
unsigned cpuspeed10 = (unsigned)(timer.Stop()/100000);
cout << "CPU speed " << cpuspeed10/ 10 << "." << cpuspeed10 % 10
<< " mhz" << endl;
cout << "Timer overhead "
<< (unsigned)(timer.m_overhead* 10000 / cpuspeed10) << " ns\n";
// get overhead associated with doing the loop......
timer.Start();
for(nIdx = 0; nIdx < iters; nIdx++);
unsigned loop = (unsigned)(timer.Stop());
unsigned sloop = loop / iters;
cout << "Loop overhead " << sloop * 10000 / cpuspeed10 << " ns\n";
// get time for process creation....
timer.Start();
char szPath[] = "E:\\temp\\cs537\\timing\\proc\\release\\proc.exe";
DWORD dwParm = DETACHED_PROCESS; // creation parameters
memset((void*)&si, 0, sizeof(STARTUPINFO)); // initialize startup info
si.cb = sizeof(STARTUPINFO);
for(nIdx = 0; nIdx < iters; nIdx++)
{
if(!CreateProcess(szPath, 0, 0, 0, FALSE, dwParm, 0, 0, &si, &pi))
{
DWORD dwErr = GetLastError();
exit(0);
}
CloseHandle(pi.hThread);
WaitForSingleObject(pi.hProcess, INFINITE);
}
unsigned create = (unsigned)(timer.Stop());
unsigned screate = create / iters;
cout << "Create Overhead " << screate * 10000 / cpuspeed10 << " ns\n";
return 0;
}
int main(int argc, char* argv[])
{
return 0;
}
This archive was generated by hypermail 2.0b3 on Sun May 06 2001 - 20:30:05 EDT