perf.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755
  1. /* Copyright (C) 2002 Free Software Foundation, Inc.
  2. This file is part of the GNU C Library.
  3. Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
  4. The GNU C Library is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU Lesser General Public
  6. License as published by the Free Software Foundation; either
  7. version 2.1 of the License, or (at your option) any later version.
  8. The GNU C Library is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with the GNU C Library; if not, write to the Free
  14. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  15. 02111-1307 USA. */
  16. #define _GNU_SOURCE 1
  17. #include <argp.h>
  18. #include <error.h>
  19. #include <errno.h>
  20. #include <fcntl.h>
  21. #include <inttypes.h>
  22. #include <limits.h>
  23. #include <pthread.h>
  24. #include <signal.h>
  25. #include <stdbool.h>
  26. #include <stdlib.h>
  27. #include <string.h>
  28. #include <time.h>
  29. #include <unistd.h>
  30. #include <sys/param.h>
  31. #include <sys/types.h>
  32. #ifndef MAX_THREADS
  33. # define MAX_THREADS 100000
  34. #endif
  35. #ifndef DEFAULT_THREADS
  36. # define DEFAULT_THREADS 50
  37. #endif
  38. #define OPT_TO_THREAD 300
  39. #define OPT_TO_PROCESS 301
  40. #define OPT_SYNC_SIGNAL 302
  41. #define OPT_SYNC_JOIN 303
  42. #define OPT_TOPLEVEL 304
  43. static const struct argp_option options[] =
  44. {
  45. { NULL, 0, NULL, 0, "\
  46. This is a test for threads so we allow ther user to selection the number of \
  47. threads which are used at any one time. Independently the total number of \
  48. rounds can be selected. This is the total number of threads which will have \
  49. run when the process terminates:" },
  50. { "threads", 't', "NUMBER", 0, "Number of threads used at once" },
  51. { "starts", 's', "NUMBER", 0, "Total number of working threads" },
  52. { "toplevel", OPT_TOPLEVEL, "NUMBER", 0,
  53. "Number of toplevel threads which start the other threads; this \
  54. implies --sync-join" },
  55. { NULL, 0, NULL, 0, "\
  56. Each thread can do one of two things: sleep or do work. The latter is 100% \
  57. CPU bound. The work load is the probability a thread does work. All values \
  58. from zero to 100 (inclusive) are valid. How often each thread repeats this \
  59. can be determined by the number of rounds. The work cost determines how long \
  60. each work session (not sleeping) takes. If it is zero a thread would \
  61. effectively nothing. By setting the number of rounds to zero the thread \
  62. does no work at all and pure thread creation times can be measured." },
  63. { "workload", 'w', "PERCENT", 0, "Percentage of time spent working" },
  64. { "workcost", 'c', "NUMBER", 0,
  65. "Factor in the cost of each round of working" },
  66. { "rounds", 'r', "NUMBER", 0, "Number of rounds each thread runs" },
  67. { NULL, 0, NULL, 0, "\
  68. There are a number of different methods how thread creation can be \
  69. synchronized. Synchronization is necessary since the number of concurrently \
  70. running threads is limited." },
  71. { "sync-signal", OPT_SYNC_SIGNAL, NULL, 0,
  72. "Synchronize using a signal (default)" },
  73. { "sync-join", OPT_SYNC_JOIN, NULL, 0, "Synchronize using pthread_join" },
  74. { NULL, 0, NULL, 0, "\
  75. One parameter for each threads execution is the size of the stack. If this \
  76. parameter is not used the system's default stack size is used. If many \
  77. threads are used the stack size should be chosen quite small." },
  78. { "stacksize", 'S', "BYTES", 0, "Size of threads stack" },
  79. { "guardsize", 'g', "BYTES", 0,
  80. "Size of stack guard area; must fit into the stack" },
  81. { NULL, 0, NULL, 0, "Signal options:" },
  82. { "to-thread", OPT_TO_THREAD, NULL, 0, "Send signal to main thread" },
  83. { "to-process", OPT_TO_PROCESS, NULL, 0,
  84. "Send signal to process (default)" },
  85. { NULL, 0, NULL, 0, "Administrative options:" },
  86. { "progress", 'p', NULL, 0, "Show signs of progress" },
  87. { "timing", 'T', NULL, 0,
  88. "Measure time from startup to the last thread finishing" },
  89. { NULL, 0, NULL, 0, NULL }
  90. };
  91. /* Prototype for option handler. */
  92. static error_t parse_opt (int key, char *arg, struct argp_state *state);
  93. /* Data structure to communicate with argp functions. */
  94. static struct argp argp =
  95. {
  96. options, parse_opt
  97. };
  98. static unsigned long int threads = DEFAULT_THREADS;
  99. static unsigned long int workload = 75;
  100. static unsigned long int workcost = 20;
  101. static unsigned long int rounds = 10;
  102. static long int starts = 5000;
  103. static unsigned long int stacksize;
  104. static long int guardsize = -1;
  105. static bool progress;
  106. static bool timing;
  107. static bool to_thread;
  108. static unsigned long int toplevel = 1;
  109. static long int running;
  110. static pthread_mutex_t running_mutex = PTHREAD_MUTEX_INITIALIZER;
  111. static pid_t pid;
  112. static pthread_t tmain;
  113. static clockid_t cl;
  114. static struct timespec start_time;
  115. static pthread_mutex_t sum_mutex = PTHREAD_MUTEX_INITIALIZER;
  116. unsigned int sum;
  117. static enum
  118. {
  119. sync_signal,
  120. sync_join
  121. }
  122. sync_method;
  123. /* We use 64bit values for the times. */
  124. typedef unsigned long long int hp_timing_t;
  125. /* Attributes for all created threads. */
  126. static pthread_attr_t attr;
  127. static void *
  128. work (void *arg)
  129. {
  130. unsigned long int i;
  131. unsigned int state = (unsigned long int) arg;
  132. for (i = 0; i < rounds; ++i)
  133. {
  134. /* Determine what to do. */
  135. unsigned int rnum;
  136. /* Uniform distribution. */
  137. do
  138. rnum = rand_r (&state);
  139. while (rnum >= UINT_MAX - (UINT_MAX % 100));
  140. rnum %= 100;
  141. if (rnum < workload)
  142. {
  143. int j;
  144. int a[4] = { i, rnum, i + rnum, rnum - i };
  145. if (progress)
  146. write (STDERR_FILENO, "c", 1);
  147. for (j = 0; j < workcost; ++j)
  148. {
  149. a[0] += a[3] >> 12;
  150. a[1] += a[2] >> 20;
  151. a[2] += a[1] ^ 0x3423423;
  152. a[3] += a[0] - a[1];
  153. }
  154. pthread_mutex_lock (&sum_mutex);
  155. sum += a[0] + a[1] + a[2] + a[3];
  156. pthread_mutex_unlock (&sum_mutex);
  157. }
  158. else
  159. {
  160. /* Just sleep. */
  161. struct timespec tv;
  162. tv.tv_sec = 0;
  163. tv.tv_nsec = 10000000;
  164. if (progress)
  165. write (STDERR_FILENO, "w", 1);
  166. nanosleep (&tv, NULL);
  167. }
  168. }
  169. return NULL;
  170. }
  171. static void *
  172. thread_function (void *arg)
  173. {
  174. work (arg);
  175. pthread_mutex_lock (&running_mutex);
  176. if (--running <= 0 && starts <= 0)
  177. {
  178. /* We are done. */
  179. if (progress)
  180. write (STDERR_FILENO, "\n", 1);
  181. if (timing)
  182. {
  183. struct timespec end_time;
  184. if (clock_gettime (cl, &end_time) == 0)
  185. {
  186. end_time.tv_sec -= start_time.tv_sec;
  187. end_time.tv_nsec -= start_time.tv_nsec;
  188. if (end_time.tv_nsec < 0)
  189. {
  190. end_time.tv_nsec += 1000000000;
  191. --end_time.tv_sec;
  192. }
  193. printf ("\nRuntime: %lu.%09lu seconds\n",
  194. (unsigned long int) end_time.tv_sec,
  195. (unsigned long int) end_time.tv_nsec);
  196. }
  197. }
  198. printf ("Result: %08x\n", sum);
  199. exit (0);
  200. }
  201. pthread_mutex_unlock (&running_mutex);
  202. if (sync_method == sync_signal)
  203. {
  204. if (to_thread)
  205. /* This code sends a signal to the main thread. */
  206. pthread_kill (tmain, SIGUSR1);
  207. else
  208. /* Use this code to test sending a signal to the process. */
  209. kill (pid, SIGUSR1);
  210. }
  211. if (progress)
  212. write (STDERR_FILENO, "f", 1);
  213. return NULL;
  214. }
  215. struct start_info
  216. {
  217. unsigned int starts;
  218. unsigned int threads;
  219. };
  220. static void *
  221. start_threads (void *arg)
  222. {
  223. struct start_info *si = arg;
  224. unsigned int starts = si->starts;
  225. pthread_t ths[si->threads];
  226. unsigned int state = starts;
  227. unsigned int n;
  228. unsigned int i = 0;
  229. int err;
  230. if (progress)
  231. write (STDERR_FILENO, "T", 1);
  232. memset (ths, '\0', sizeof (pthread_t) * si->threads);
  233. while (starts-- > 0)
  234. {
  235. if (ths[i] != 0)
  236. {
  237. /* Wait for the threads in the order they were created. */
  238. err = pthread_join (ths[i], NULL);
  239. if (err != 0)
  240. error (EXIT_FAILURE, err, "cannot join thread");
  241. if (progress)
  242. write (STDERR_FILENO, "f", 1);
  243. }
  244. err = pthread_create (&ths[i], &attr, work,
  245. (void *) (long) (rand_r (&state) + starts + i));
  246. if (err != 0)
  247. error (EXIT_FAILURE, err, "cannot start thread");
  248. if (progress)
  249. write (STDERR_FILENO, "t", 1);
  250. if (++i == si->threads)
  251. i = 0;
  252. }
  253. n = i;
  254. do
  255. {
  256. if (ths[i] != 0)
  257. {
  258. err = pthread_join (ths[i], NULL);
  259. if (err != 0)
  260. error (EXIT_FAILURE, err, "cannot join thread");
  261. if (progress)
  262. write (STDERR_FILENO, "f", 1);
  263. }
  264. if (++i == si->threads)
  265. i = 0;
  266. }
  267. while (i != n);
  268. if (progress)
  269. write (STDERR_FILENO, "F", 1);
  270. return NULL;
  271. }
  272. int
  273. main (int argc, char *argv[])
  274. {
  275. int remaining;
  276. sigset_t ss;
  277. pthread_t th;
  278. pthread_t *ths = NULL;
  279. int empty = 0;
  280. int last;
  281. bool cont = true;
  282. /* Parse and process arguments. */
  283. argp_parse (&argp, argc, argv, 0, &remaining, NULL);
  284. if (sync_method == sync_join)
  285. {
  286. ths = (pthread_t *) calloc (threads, sizeof (pthread_t));
  287. if (ths == NULL)
  288. error (EXIT_FAILURE, errno,
  289. "cannot allocate memory for thread descriptor array");
  290. last = threads;
  291. }
  292. else
  293. {
  294. ths = &th;
  295. last = 1;
  296. }
  297. if (toplevel > threads)
  298. {
  299. printf ("resetting number of toplevel threads to %lu to not surpass number to concurrent threads\n",
  300. threads);
  301. toplevel = threads;
  302. }
  303. if (timing)
  304. {
  305. if (clock_getcpuclockid (0, &cl) != 0
  306. || clock_gettime (cl, &start_time) != 0)
  307. timing = false;
  308. }
  309. /* We need this later. */
  310. pid = getpid ();
  311. tmain = pthread_self ();
  312. /* We use signal SIGUSR1 for communication between the threads and
  313. the main thread. We only want sychronous notification. */
  314. if (sync_method == sync_signal)
  315. {
  316. sigemptyset (&ss);
  317. sigaddset (&ss, SIGUSR1);
  318. if (sigprocmask (SIG_BLOCK, &ss, NULL) != 0)
  319. error (EXIT_FAILURE, errno, "cannot set signal mask");
  320. }
  321. /* Create the thread attributes. */
  322. pthread_attr_init (&attr);
  323. /* If the user provided a stack size use it. */
  324. if (stacksize != 0
  325. && pthread_attr_setstacksize (&attr, stacksize) != 0)
  326. puts ("could not set stack size; will use default");
  327. /* And stack guard size. */
  328. if (guardsize != -1
  329. && pthread_attr_setguardsize (&attr, guardsize) != 0)
  330. puts ("invalid stack guard size; will use default");
  331. /* All threads are created detached if we are not using pthread_join
  332. to synchronize. */
  333. if (sync_method != sync_join)
  334. pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
  335. if (sync_method == sync_signal)
  336. {
  337. while (1)
  338. {
  339. int err;
  340. bool do_wait = false;
  341. pthread_mutex_lock (&running_mutex);
  342. if (starts-- < 0)
  343. cont = false;
  344. else
  345. do_wait = ++running >= threads && starts > 0;
  346. pthread_mutex_unlock (&running_mutex);
  347. if (! cont)
  348. break;
  349. if (progress)
  350. write (STDERR_FILENO, "t", 1);
  351. err = pthread_create (&ths[empty], &attr, thread_function,
  352. (void *) starts);
  353. if (err != 0)
  354. error (EXIT_FAILURE, err, "cannot start thread %lu", starts);
  355. if (++empty == last)
  356. empty = 0;
  357. if (do_wait)
  358. sigwaitinfo (&ss, NULL);
  359. }
  360. /* Do nothing anymore. On of the threads will terminate the program. */
  361. sigfillset (&ss);
  362. sigdelset (&ss, SIGINT);
  363. while (1)
  364. sigsuspend (&ss);
  365. }
  366. else
  367. {
  368. pthread_t ths[toplevel];
  369. struct start_info si[toplevel];
  370. unsigned int i;
  371. for (i = 0; i < toplevel; ++i)
  372. {
  373. unsigned int child_starts = starts / (toplevel - i);
  374. unsigned int child_threads = threads / (toplevel - i);
  375. int err;
  376. si[i].starts = child_starts;
  377. si[i].threads = child_threads;
  378. err = pthread_create (&ths[i], &attr, start_threads, &si[i]);
  379. if (err != 0)
  380. error (EXIT_FAILURE, err, "cannot start thread");
  381. starts -= child_starts;
  382. threads -= child_threads;
  383. }
  384. for (i = 0; i < toplevel; ++i)
  385. {
  386. int err = pthread_join (ths[i], NULL);
  387. if (err != 0)
  388. error (EXIT_FAILURE, err, "cannot join thread");
  389. }
  390. /* We are done. */
  391. if (progress)
  392. write (STDERR_FILENO, "\n", 1);
  393. if (timing)
  394. {
  395. struct timespec end_time;
  396. if (clock_gettime (cl, &end_time) == 0)
  397. {
  398. end_time.tv_sec -= start_time.tv_sec;
  399. end_time.tv_nsec -= start_time.tv_nsec;
  400. if (end_time.tv_nsec < 0)
  401. {
  402. end_time.tv_nsec += 1000000000;
  403. --end_time.tv_sec;
  404. }
  405. printf ("\nRuntime: %lu.%09lu seconds\n",
  406. (unsigned long int) end_time.tv_sec,
  407. (unsigned long int) end_time.tv_nsec);
  408. }
  409. }
  410. printf ("Result: %08x\n", sum);
  411. exit (0);
  412. }
  413. /* NOTREACHED */
  414. return 0;
  415. }
  416. /* Handle program arguments. */
  417. static error_t
  418. parse_opt (int key, char *arg, struct argp_state *state)
  419. {
  420. unsigned long int num;
  421. long int snum;
  422. switch (key)
  423. {
  424. case 't':
  425. num = strtoul (arg, NULL, 0);
  426. if (num <= MAX_THREADS)
  427. threads = num;
  428. else
  429. printf ("\
  430. number of threads limited to %u; recompile with a higher limit if necessary",
  431. MAX_THREADS);
  432. break;
  433. case 'w':
  434. num = strtoul (arg, NULL, 0);
  435. if (num <= 100)
  436. workload = num;
  437. else
  438. puts ("workload must be between 0 and 100 percent");
  439. break;
  440. case 'c':
  441. workcost = strtoul (arg, NULL, 0);
  442. break;
  443. case 'r':
  444. rounds = strtoul (arg, NULL, 0);
  445. break;
  446. case 's':
  447. starts = strtoul (arg, NULL, 0);
  448. break;
  449. case 'S':
  450. num = strtoul (arg, NULL, 0);
  451. if (num >= PTHREAD_STACK_MIN)
  452. stacksize = num;
  453. else
  454. printf ("minimum stack size is %d\n", PTHREAD_STACK_MIN);
  455. break;
  456. case 'g':
  457. snum = strtol (arg, NULL, 0);
  458. if (snum < 0)
  459. printf ("invalid guard size %s\n", arg);
  460. else
  461. guardsize = snum;
  462. break;
  463. case 'p':
  464. progress = true;
  465. break;
  466. case 'T':
  467. timing = true;
  468. break;
  469. case OPT_TO_THREAD:
  470. to_thread = true;
  471. break;
  472. case OPT_TO_PROCESS:
  473. to_thread = false;
  474. break;
  475. case OPT_SYNC_SIGNAL:
  476. sync_method = sync_signal;
  477. break;
  478. case OPT_SYNC_JOIN:
  479. sync_method = sync_join;
  480. break;
  481. case OPT_TOPLEVEL:
  482. num = strtoul (arg, NULL, 0);
  483. if (num < MAX_THREADS)
  484. toplevel = num;
  485. else
  486. printf ("\
  487. number of threads limited to %u; recompile with a higher limit if necessary",
  488. MAX_THREADS);
  489. sync_method = sync_join;
  490. break;
  491. default:
  492. return ARGP_ERR_UNKNOWN;
  493. }
  494. return 0;
  495. }
  496. static hp_timing_t
  497. get_clockfreq (void)
  498. {
  499. /* We read the information from the /proc filesystem. It contains at
  500. least one line like
  501. cpu MHz : 497.840237
  502. or also
  503. cpu MHz : 497.841
  504. We search for this line and convert the number in an integer. */
  505. static hp_timing_t result;
  506. int fd;
  507. /* If this function was called before, we know the result. */
  508. if (result != 0)
  509. return result;
  510. fd = open ("/proc/cpuinfo", O_RDONLY);
  511. if (__builtin_expect (fd != -1, 1))
  512. {
  513. /* XXX AFAIK the /proc filesystem can generate "files" only up
  514. to a size of 4096 bytes. */
  515. char buf[4096];
  516. ssize_t n;
  517. n = read (fd, buf, sizeof buf);
  518. if (__builtin_expect (n, 1) > 0)
  519. {
  520. char *mhz = memmem (buf, n, "cpu MHz", 7);
  521. if (__builtin_expect (mhz != NULL, 1))
  522. {
  523. char *endp = buf + n;
  524. int seen_decpoint = 0;
  525. int ndigits = 0;
  526. /* Search for the beginning of the string. */
  527. while (mhz < endp && (*mhz < '0' || *mhz > '9') && *mhz != '\n')
  528. ++mhz;
  529. while (mhz < endp && *mhz != '\n')
  530. {
  531. if (*mhz >= '0' && *mhz <= '9')
  532. {
  533. result *= 10;
  534. result += *mhz - '0';
  535. if (seen_decpoint)
  536. ++ndigits;
  537. }
  538. else if (*mhz == '.')
  539. seen_decpoint = 1;
  540. ++mhz;
  541. }
  542. /* Compensate for missing digits at the end. */
  543. while (ndigits++ < 6)
  544. result *= 10;
  545. }
  546. }
  547. close (fd);
  548. }
  549. return result;
  550. }
  551. int
  552. clock_getcpuclockid (pid_t pid, clockid_t *clock_id)
  553. {
  554. /* We don't allow any process ID but our own. */
  555. if (pid != 0 && pid != getpid ())
  556. return EPERM;
  557. #ifdef CLOCK_PROCESS_CPUTIME_ID
  558. /* Store the number. */
  559. *clock_id = CLOCK_PROCESS_CPUTIME_ID;
  560. return 0;
  561. #else
  562. /* We don't have a timer for that. */
  563. return ENOENT;
  564. #endif
  565. }
  566. #ifdef i386
  567. #define HP_TIMING_NOW(Var) __asm__ __volatile__ ("rdtsc" : "=A" (Var))
  568. #elif defined __ia64__
  569. #define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (Var) : : "memory")
  570. #else
  571. #error "HP_TIMING_NOW missing"
  572. #endif
  573. /* Get current value of CLOCK and store it in TP. */
  574. int
  575. clock_gettime (clockid_t clock_id, struct timespec *tp)
  576. {
  577. int retval = -1;
  578. switch (clock_id)
  579. {
  580. case CLOCK_PROCESS_CPUTIME_ID:
  581. {
  582. static hp_timing_t freq;
  583. hp_timing_t tsc;
  584. /* Get the current counter. */
  585. HP_TIMING_NOW (tsc);
  586. if (freq == 0)
  587. {
  588. freq = get_clockfreq ();
  589. if (freq == 0)
  590. return EINVAL;
  591. }
  592. /* Compute the seconds. */
  593. tp->tv_sec = tsc / freq;
  594. /* And the nanoseconds. This computation should be stable until
  595. we get machines with about 16GHz frequency. */
  596. tp->tv_nsec = ((tsc % freq) * UINT64_C (1000000000)) / freq;
  597. retval = 0;
  598. }
  599. break;
  600. default:
  601. errno = EINVAL;
  602. break;
  603. }
  604. return retval;
  605. }