1 /** 2 * Contains support code for code profiling. 3 * 4 * Copyright: Copyright Digital Mars 1995 - 2017. 5 * License: Distributed under the 6 * $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0). 7 * (See accompanying file LICENSE) 8 * Authors: Walter Bright, Sean Kelly, the LDC team 9 * Source: $(DRUNTIMESRC rt/_trace.d) 10 */ 11 12 module rt.trace; 13 14 import core.demangle; 15 import core.stdc.ctype; 16 import core.stdc.stdio; 17 import core.stdc.stdlib; 18 import core.stdc.string; 19 20 version (CRuntime_Microsoft) 21 private alias core.stdc.stdlib._strtoui64 strtoull; 22 23 shared static this () 24 { 25 enum DefaultLog = "trace.log"; 26 enum DefaultDef = "trace.def"; 27 28 trace_logfilename = strdup(DefaultLog.ptr)[0 .. DefaultLog.length + 1]; 29 trace_deffilename = strdup(DefaultDef.ptr)[0 .. DefaultDef.length + 1]; 30 } 31 32 /** 33 * Set the file path for profile reports (`-profile`) 34 * 35 * This function is a public API, exposed in `core.runtime`. 36 * 37 * Since we are calling C functions under the hood, 38 * and we might need to open and close files during the 39 * runtime tear-down we copy the parameter via malloc 40 * to ensure NUL-termination. 41 * 42 * Params: 43 * name = Path to the output file. Empty means stdout. 44 */ 45 extern(C) void trace_setlogfilename(string name) 46 { 47 updateFileName(trace_logfilename, name); 48 } 49 50 /** 51 * Set the file path for the optimized profile linker DEF file (`-profile`) 52 * 53 * This function is a public API, exposed in `core.runtime`. 54 * 55 * Since we are calling C functions under the hood, 56 * and we might need to open and close files during the 57 * runtime tear-down we copy the parameter via malloc 58 * to ensure NUL-termination. 59 * 60 * Params: 61 * name = Path to the output file. Empty means stdout. 62 */ 63 extern(C) void trace_setdeffilename(string name) 64 { 65 updateFileName(trace_deffilename, name); 66 } 67 68 private: 69 70 // Code shared by both `trace_setXXXfilename` 71 void updateFileName(ref char[] filename, string name) 72 { 73 if (!name.length) 74 { 75 free(filename.ptr); 76 filename = null; 77 } 78 else if (auto newPtr = cast(char*)realloc(filename.ptr, name.length + 1)) 79 { 80 filename = newPtr[0 .. name.length + 1]; 81 filename[0 .. $ - 1] = name[]; 82 filename[$ - 1] = 0; 83 } 84 else 85 assert(0, "Memory allocation failed"); 86 } 87 88 alias long timer_t; 89 90 ///////////////////////////////////// 91 // 92 93 struct SymPair 94 { 95 SymPair* next; 96 Symbol* sym; // function that is called 97 ulong count; // number of times sym is called 98 } 99 100 ///////////////////////////////////// 101 // A Symbol for each function name. 102 103 struct Symbol 104 { 105 Symbol* Sl, Sr; // left, right children 106 SymPair* Sfanin; // list of calling functions 107 SymPair* Sfanout; // list of called functions 108 timer_t totaltime; // aggregate time 109 timer_t functime; // time excluding subfunction calls 110 ubyte Sflags; // SFxxxx 111 uint recursion; // call recursion level 112 const(char)[] Sident; // name of symbol 113 } 114 115 enum ubyte SFvisited = 1; // visited 116 117 118 ////////////////////////////////// 119 // Build a linked list of these. 120 121 struct Stack 122 { 123 Stack* prev; 124 Symbol* sym; 125 timer_t starttime; // time when function was entered 126 timer_t ohd; // overhead of all the bookkeeping code 127 timer_t subtime; // time used by all subfunctions 128 } 129 130 Symbol* root; // root of symbol table 131 bool trace_inited; 132 133 Stack* stack_freelist; 134 Stack* trace_tos; // top of stack 135 136 __gshared 137 { 138 Symbol* groot; // merged symbol table 139 int gtrace_inited; // !=0 if initialized 140 141 timer_t trace_ohd; 142 143 // Those strings include the `\0` in their slice as they're used with fopen 144 char[] trace_logfilename; 145 char[] trace_deffilename; 146 } 147 148 //////////////////////////////////////// 149 // Output optimal function link order. 150 151 private void trace_order(FILE* fpdef, Symbol *s) 152 { 153 while (s) 154 { 155 trace_place(fpdef, s, 0); 156 if (s.Sl) 157 trace_order(fpdef, s.Sl); 158 s = s.Sr; 159 } 160 } 161 162 ////////////////////////////////////////////// 163 // 164 165 private Stack* stack_push() 166 { 167 Stack *s; 168 169 if (stack_freelist) 170 { 171 s = stack_freelist; 172 stack_freelist = s.prev; 173 } 174 else 175 { 176 s = cast(Stack *)trace_malloc(Stack.sizeof); 177 } 178 s.prev = trace_tos; 179 trace_tos = s; 180 return s; 181 } 182 183 ////////////////////////////////////////////// 184 // 185 186 private void stack_free(Stack *s) 187 { 188 s.prev = stack_freelist; 189 stack_freelist = s; 190 } 191 192 ////////////////////////////////////// 193 // Qsort() comparison routine for array of pointers to SymPair's. 194 195 extern(C) int sympair_cmp(scope const void* e1, scope const void* e2) nothrow @nogc 196 { 197 auto count1 = (*cast(SymPair**)e1).count; 198 auto count2 = (*cast(SymPair**)e2).count; 199 return (count1 > count2) - (count1 < count2); 200 } 201 202 ////////////////////////////////////// 203 // Place symbol s, and then place any fan ins or fan outs with 204 // counts greater than count. 205 206 private void trace_place(FILE* fpdef, Symbol* s, ulong count) 207 { 208 if (!(s.Sflags & SFvisited)) 209 { 210 //printf("\t%.*s\t%llu\n", s.Sident.length, s.Sident.ptr, count); 211 fprintf(fpdef,"\t%.*s\n", cast(int) s.Sident.length, s.Sident.ptr); 212 s.Sflags |= SFvisited; 213 214 // Compute number of items in array 215 size_t num = 0; 216 for (auto sp = s.Sfanin; sp; sp = sp.next) 217 num++; 218 for (auto sp = s.Sfanout; sp; sp = sp.next) 219 num++; 220 if (!num) 221 return; 222 223 // Allocate and fill array 224 auto base = cast(SymPair**)trace_malloc(SymPair.sizeof * num); 225 size_t u = 0; 226 for (auto sp = s.Sfanin; sp; sp = sp.next) 227 base[u++] = sp; 228 for (auto sp = s.Sfanout; sp; sp = sp.next) 229 base[u++] = sp; 230 assert(u == num); 231 232 // Sort array 233 qsort(base, num, (SymPair *).sizeof, &sympair_cmp); 234 235 //for (u = 0; u < num; u++) 236 //printf("\t\t%.*s\t%llu\n", base[u].sym.Sident.length, base[u].sym.Sident.ptr, base[u].count); 237 238 // Place symbols 239 for (u = 0; u < num; u++) 240 { 241 if (base[u].count >= count) 242 { 243 auto u2 = (u + 1 < num) ? u + 1 : u; 244 auto c2 = base[u2].count; 245 if (c2 < count) 246 c2 = count; 247 trace_place(fpdef, base[u].sym,c2); 248 } 249 else 250 break; 251 } 252 253 // Clean up 254 trace_free(base); 255 } 256 } 257 258 /////////////////////////////////// 259 // Report results. 260 // Also compute and return number of symbols. 261 262 private size_t trace_report(FILE* fplog, Symbol* s) 263 { 264 //printf("trace_report()\n"); 265 size_t nsymbols; 266 while (s) 267 { 268 ++nsymbols; 269 if (s.Sl) 270 nsymbols += trace_report(fplog, s.Sl); 271 fprintf(fplog,"------------------\n"); 272 ulong count = 0; 273 for (auto sp = s.Sfanin; sp; sp = sp.next) 274 { 275 fprintf(fplog,"\t%5llu\t%.*s\n", sp.count, cast(int) sp.sym.Sident.length, sp.sym.Sident.ptr); 276 count += sp.count; 277 } 278 fprintf(fplog,"%.*s\t%llu\t%lld\t%lld\n", cast(int) s.Sident.length, s.Sident.ptr, count, s.totaltime, s.functime); 279 for (auto sp = s.Sfanout; sp; sp = sp.next) 280 { 281 fprintf(fplog,"\t%5llu\t%.*s\n", sp.count, cast(int) sp.sym.Sident.length, sp.sym.Sident.ptr); 282 } 283 s = s.Sr; 284 } 285 return nsymbols; 286 } 287 288 //////////////////////////////////// 289 // Allocate and fill array of symbols. 290 291 private void trace_array(Symbol*[] psymbols, Symbol *s, ref uint u) 292 { 293 while (s) 294 { 295 psymbols[u++] = s; 296 trace_array(psymbols, s.Sl, u); 297 s = s.Sr; 298 } 299 } 300 301 302 ////////////////////////////////////// 303 // Qsort() comparison routine for array of pointers to Symbol's. 304 305 extern(C) int symbol_cmp(scope const void* e1, scope const void* e2) nothrow @nogc 306 { 307 auto ps1 = cast(Symbol **)e1; 308 auto ps2 = cast(Symbol **)e2; 309 310 auto diff = (*ps2).functime - (*ps1).functime; 311 return (diff == 0) ? 0 : ((diff > 0) ? 1 : -1); 312 } 313 314 315 /////////////////////////////////// 316 // Report function timings 317 318 private void trace_times(FILE* fplog, Symbol*[] psymbols) 319 { 320 // Sort array 321 qsort(psymbols.ptr, psymbols.length, (Symbol *).sizeof, &symbol_cmp); 322 323 // Print array header 324 timer_t time_scale; 325 static if (is(typeof(&QueryPerformanceFrequency))) 326 { 327 timer_t freq; 328 QueryPerformanceFrequency(&freq); 329 time_scale = freq / 1_000_000; 330 fprintf(fplog,"\n======== Timer Is %lld Ticks/Sec, Times are in Microsecs ========\n\n",freq); 331 } 332 else 333 { 334 // The exact frequency is unknown (and may vary), so do the reporting in Mega Ticks, 335 // which corresponds to 1 microsecond on a 1GHz clock. 336 time_scale = 1_000_000; 337 fprintf(fplog,"\n======== Timer frequency unknown, Times are in Megaticks ========\n\n"); 338 } 339 fprintf(fplog," Num Tree Func Per\n"); 340 fprintf(fplog," Calls Time Time Call\n\n"); 341 342 // Print array 343 foreach (s; psymbols) 344 { 345 timer_t tl,tr; 346 timer_t fl,fr; 347 timer_t pl,pr; 348 char[8192] buf = void; 349 SymPair* sp; 350 ulong calls; 351 char[] id; 352 353 calls = 0; 354 id = demangle(s.Sident, buf); 355 for (sp = s.Sfanin; sp; sp = sp.next) 356 calls += sp.count; 357 if (calls == 0) 358 calls = 1; 359 360 tl = s.totaltime / time_scale; 361 fl = s.functime / time_scale; 362 pl = s.functime / calls / time_scale; 363 364 fprintf(fplog,"%7llu%12lld%12lld%12lld %.*s\n", 365 calls, tl, fl, pl, cast(int) id.length, id.ptr); 366 } 367 } 368 369 370 /////////////////////////////////// 371 // Initialize. 372 373 private void trace_init() 374 { 375 synchronized // protects gtrace_inited 376 { 377 if (!gtrace_inited) 378 { 379 gtrace_inited = 1; 380 381 { // See if we can determine the overhead. 382 timer_t starttime; 383 timer_t endtime; 384 385 auto st = trace_tos; 386 trace_tos = null; 387 QueryPerformanceCounter(&starttime); 388 uint u; 389 for (u = 0; u < 100; u++) 390 { 391 _c_trace_pro(0,null); 392 _c_trace_epi(); 393 } 394 QueryPerformanceCounter(&endtime); 395 trace_ohd = (endtime - starttime) / u; 396 //printf("trace_ohd = %lld\n",trace_ohd); 397 if (trace_ohd > 0) 398 trace_ohd--; // round down 399 trace_tos = st; 400 } 401 } 402 } 403 } 404 405 ///////////////////////////////// 406 // Terminate. 407 408 static ~this() 409 { 410 // Free remainder of the thread local stack 411 while (trace_tos) 412 { 413 auto n = trace_tos.prev; 414 stack_free(trace_tos); 415 trace_tos = n; 416 } 417 418 // And free the thread local stack's memory 419 while (stack_freelist) 420 { 421 auto n = stack_freelist.prev; 422 stack_free(stack_freelist); 423 stack_freelist = n; 424 } 425 426 synchronized // protects groot 427 { 428 // Merge thread local root into global groot 429 430 if (!groot) 431 { 432 groot = root; // that was easy 433 root = null; 434 } 435 else 436 { 437 void mergeSymbol(Symbol** proot, const(Symbol)* s) 438 { 439 while (s) 440 { 441 auto gs = trace_addsym(proot, s.Sident); 442 gs.totaltime += s.totaltime; 443 gs.functime += s.functime; 444 445 static void mergeFan(Symbol** proot, SymPair** pgf, const(SymPair)* sf) 446 { 447 for (; sf; sf = sf.next) 448 { 449 auto sym = trace_addsym(proot, sf.sym.Sident); 450 for (auto gf = *pgf; 1; gf = gf.next) 451 { 452 if (!gf) 453 { 454 auto sp = cast(SymPair *)trace_malloc(SymPair.sizeof); 455 sp.next = *pgf; 456 *pgf = sp; 457 sp.sym = sym; 458 sp.count = sf.count; 459 break; 460 } 461 if (gf.sym == sym) 462 { 463 gf.count += sf.count; 464 break; 465 } 466 } 467 } 468 } 469 470 mergeFan(proot, &gs.Sfanin, s.Sfanin); 471 mergeFan(proot, &gs.Sfanout, s.Sfanout); 472 473 mergeSymbol(proot, s.Sl); 474 s = s.Sr; 475 } 476 } 477 478 mergeSymbol(&groot, root); 479 } 480 } 481 482 // Free the memory for the thread local symbol table (root) 483 static void freeSymbol(Symbol* s) 484 { 485 while (s) 486 { 487 freeSymbol(s.Sl); 488 auto next = s.Sr; 489 490 static void freeSymPair(SymPair* sp) 491 { 492 while (sp) 493 { 494 auto spnext = sp.next; 495 trace_free(sp); 496 sp = spnext; 497 } 498 } 499 500 freeSymPair(s.Sfanin); 501 freeSymPair(s.Sfanout); 502 trace_free(s); 503 s = next; 504 } 505 } 506 507 freeSymbol(root); 508 root = null; 509 } 510 511 shared static ~this() 512 { 513 //printf("shared static ~this() groot = %p\n", groot); 514 if (gtrace_inited == 1) 515 { 516 gtrace_inited = 2; 517 518 // Merge in data from any existing file 519 trace_merge(&groot); 520 521 // Report results 522 FILE* fplog = trace_logfilename.length == 0 ? stdout : 523 fopen(trace_logfilename.ptr, "w"); 524 if (fplog) 525 { 526 auto nsymbols = trace_report(fplog, groot); 527 528 auto p = cast(Symbol **)trace_malloc((Symbol *).sizeof * nsymbols); 529 auto psymbols = p[0 .. nsymbols]; 530 531 uint u; 532 trace_array(psymbols, groot, u); 533 trace_times(fplog, psymbols); 534 fclose(fplog); 535 536 trace_free(psymbols.ptr); 537 psymbols = null; 538 } 539 else 540 fprintf(stderr, "cannot write '%s'", trace_logfilename.ptr); 541 542 // Output function link order 543 FILE* fpdef = trace_deffilename.length == 0 ? stdout : 544 fopen(trace_deffilename.ptr, "w"); 545 if (fpdef) 546 { 547 fprintf(fpdef,"\nFUNCTIONS\n"); 548 trace_order(fpdef, groot); 549 fclose(fpdef); 550 } 551 else 552 fprintf(stderr, "cannot write '%s'", trace_deffilename.ptr); 553 } 554 } 555 556 ///////////////////////////////// 557 // Our storage allocator. 558 559 private void *trace_malloc(size_t nbytes) 560 { 561 auto p = malloc(nbytes); 562 if (!p) 563 exit(EXIT_FAILURE); 564 return p; 565 } 566 567 private void trace_free(void *p) 568 { 569 free(p); 570 } 571 572 ////////////////////////////////////////////// 573 // 574 575 private Symbol* trace_addsym(Symbol** proot, const(char)[] id) 576 { 577 //printf("trace_addsym('%s',%d)\n",p,len); 578 auto parent = proot; 579 auto rover = *parent; 580 while (rover !is null) // while we haven't run out of tree 581 { 582 immutable len = id.length <= rover.Sident.length ? id.length : rover.Sident.length; 583 int cmp = memcmp(id.ptr, rover.Sident.ptr, len); 584 if (!cmp) 585 cmp = id.length < rover.Sident.length ? -1 : (id.length > rover.Sident.length); 586 if (cmp == 0) 587 { 588 return rover; 589 } 590 parent = (cmp < 0) ? /* if we go down left side */ 591 &(rover.Sl) : /* then get left child */ 592 &(rover.Sr); /* else get right child */ 593 rover = *parent; /* get child */ 594 } 595 /* not in table, so insert into table */ 596 auto s = cast(Symbol *)trace_malloc(Symbol.sizeof); 597 memset(s,0,Symbol.sizeof); 598 s.Sident = id; 599 *parent = s; // link new symbol into tree 600 return s; 601 } 602 603 /*********************************** 604 * Add symbol s with count to SymPair list. 605 */ 606 607 private void trace_sympair_add(SymPair** psp, Symbol* s, ulong count) 608 { 609 SymPair* sp; 610 611 for (; 1; psp = &sp.next) 612 { 613 sp = *psp; 614 if (!sp) 615 { 616 sp = cast(SymPair *)trace_malloc(SymPair.sizeof); 617 sp.sym = s; 618 sp.count = 0; 619 sp.next = null; 620 *psp = sp; 621 break; 622 } 623 else if (sp.sym == s) 624 { 625 break; 626 } 627 } 628 sp.count += count; 629 } 630 631 ////////////////////////////////////////////// 632 // This one is called by DMD 633 634 private extern(C) void trace_pro(char[] id) 635 { 636 //printf("trace_pro(ptr = %p, length = %lld)\n", id.ptr, id.length); 637 //printf("trace_pro(id = '%.*s')\n", id.length, id.ptr); 638 639 if (!trace_inited) 640 { 641 trace_inited = true; 642 trace_init(); // initialize package 643 } 644 645 timer_t starttime; 646 QueryPerformanceCounter(&starttime); 647 if (id.length == 0) 648 return; 649 auto tos = stack_push(); 650 auto s = trace_addsym(&root, id); 651 tos.sym = s; 652 if (tos.prev) 653 { 654 // Accumulate Sfanout and Sfanin 655 auto prev = tos.prev.sym; 656 trace_sympair_add(&prev.Sfanout,s,1); 657 trace_sympair_add(&s.Sfanin,prev,1); 658 } 659 timer_t t; 660 QueryPerformanceCounter(&t); 661 tos.starttime = starttime; 662 tos.ohd = trace_ohd + t - starttime; 663 tos.subtime = 0; 664 ++s.recursion; 665 //printf("tos.ohd=%lld, trace_ohd=%lld + t=%lld - starttime=%lld\n", 666 // tos.ohd,trace_ohd,t,starttime); 667 } 668 669 // Called by some old versions of DMD 670 extern(C) void _c_trace_pro(size_t idlen, char* idptr) 671 { 672 char[] id = idptr[0 .. idlen]; 673 trace_pro(id); 674 } 675 676 ///////////////////////////////////////// 677 // Called by DMD generated code 678 679 extern(C) void _c_trace_epi() 680 { 681 //printf("_c_trace_epi()\n"); 682 auto tos = trace_tos; 683 if (tos) 684 { 685 timer_t endtime; 686 QueryPerformanceCounter(&endtime); 687 auto starttime = tos.starttime; 688 auto totaltime = endtime - starttime - tos.ohd; 689 if (totaltime < 0) 690 { //printf("endtime=%lld - starttime=%lld - tos.ohd=%lld < 0\n", 691 // endtime,starttime,tos.ohd); 692 totaltime = 0; // round off error, just make it 0 693 } 694 695 // totaltime is time spent in this function + all time spent in 696 // subfunctions - bookkeeping overhead. 697 --tos.sym.recursion; 698 if (tos.sym.recursion == 0) 699 tos.sym.totaltime += totaltime; 700 701 //if (totaltime < tos.subtime) 702 //printf("totaltime=%lld < tos.subtime=%lld\n",totaltime,tos.subtime); 703 tos.sym.functime += totaltime - tos.subtime; 704 auto ohd = tos.ohd; 705 auto n = tos.prev; 706 stack_free(tos); 707 trace_tos = n; 708 if (n) 709 { 710 timer_t t; 711 QueryPerformanceCounter(&t); 712 n.ohd += ohd + t - endtime; 713 n.subtime += totaltime; 714 //printf("n.ohd = %lld\n",n.ohd); 715 } 716 } 717 } 718 719 720 ////////////////////////// FILE INTERFACE ///////////////////////// 721 722 ///////////////////////////////////// 723 // Read line from file fp. 724 // Returns: 725 // trace_malloc'd line buffer 726 // null if end of file 727 728 char[] trace_readline(FILE* fp) 729 { 730 char[] buf; 731 // Last character used in `buf` 732 size_t idx; 733 // Used to break out of the do .. while 734 int currentChar = EOF; 735 736 //printf("trace_readline(%p)\n", fp); 737 do 738 { 739 if (buf.length <= idx) 740 { 741 const size_t newLength = buf.length + 80; 742 if (auto newPtr = cast(char*)realloc(buf.ptr, newLength)) 743 buf = newPtr[0 .. newLength]; 744 else 745 assert(0, "Memory allocation failed"); 746 } 747 currentChar = fgetc(fp); 748 buf[idx++] = cast(char)currentChar; 749 } while (currentChar != EOF && currentChar != '\n'); 750 751 // Encountered '\n' or EOF immediately 752 // The calling code makes a distinction between EOF and '\n' 753 if (idx == 1 && currentChar == EOF) 754 { 755 trace_free(buf.ptr); 756 return null; 757 } 758 buf[idx - 1] = 0; 759 return buf[0 .. idx]; 760 } 761 762 ////////////////////////////////////// 763 // Skip space 764 765 private char *skipspace(char *p) 766 { 767 while (isspace(*p)) 768 p++; 769 return p; 770 } 771 772 //////////////////////////////////////////////////////// 773 // Merge in profiling data from existing file. 774 775 private void trace_merge(Symbol** proot) 776 { 777 // We're outputting to stdout 778 if (!trace_logfilename.length) 779 return; 780 781 FILE* fp = fopen(trace_logfilename.ptr, "r"); 782 if (fp is null) 783 return; 784 scope(exit) fclose(fp); 785 786 char* buf = null; 787 SymPair* sfanin = null; 788 auto psp = &sfanin; 789 char *p; 790 ulong count; 791 Symbol *s; 792 793 while (1) 794 { 795 trace_free(buf); 796 buf = trace_readline(fp).ptr; 797 if (!buf) 798 break; 799 switch (*buf) 800 { 801 case '=': // ignore rest of file 802 trace_free(buf); 803 return; 804 case ' ': 805 case '\t': // fan in or fan out line 806 count = strtoul(buf,&p,10); 807 if (p == buf) // if invalid conversion 808 continue; 809 p = skipspace(p); 810 if (!*p) 811 continue; 812 s = trace_addsym(proot, p[0 .. strlen(p)]); 813 trace_sympair_add(psp,s,count); 814 break; 815 default: 816 if (!isalpha(*buf)) 817 { 818 if (!sfanin) 819 psp = &sfanin; 820 continue; // regard unrecognized line as separator 821 } 822 goto case; 823 case '?': 824 case '_': 825 case '$': 826 case '@': 827 p = buf; 828 while (isgraph(*p)) 829 p++; 830 *p = 0; 831 //printf("trace_addsym('%s')\n",buf); 832 s = trace_addsym(proot, buf[0 .. strlen(buf)]); 833 if (s.Sfanin) 834 { 835 SymPair *sp; 836 837 for (; sfanin; sfanin = sp) 838 { 839 trace_sympair_add(&s.Sfanin,sfanin.sym,sfanin.count); 840 sp = sfanin.next; 841 trace_free(sfanin); 842 } 843 } 844 else 845 s.Sfanin = sfanin; 846 847 sfanin = null; 848 psp = &s.Sfanout; 849 850 { 851 p++; 852 count = strtoul(p,&p,10); 853 timer_t t = cast(long)strtoull(p,&p,10); 854 s.totaltime += t; 855 t = cast(long)strtoull(p,&p,10); 856 s.functime += t; 857 } 858 break; 859 } 860 } 861 } 862 863 ////////////////////////// COMPILER INTERFACE ///////////////////// 864 865 version (Windows) 866 { 867 extern (Windows) 868 { 869 export int QueryPerformanceCounter(timer_t *); 870 export int QueryPerformanceFrequency(timer_t *); 871 } 872 } 873 else version (AArch64) 874 { 875 // We cannot use ldc.intrinsics.llvm_readcyclecounter because that is not an accurate 876 // time counter (it is a counter of CPU cycles, where here we want a time clock). 877 // Also, priviledged execution rights are needed to enable correct counting with 878 // ldc.intrinsics.llvm_readcyclecounter on AArch64. 879 extern (D) void QueryPerformanceCounter(timer_t* ctr) 880 { 881 asm { "mrs %0, cntvct_el0" : "=r" (*ctr); } 882 } 883 extern (D) void QueryPerformanceFrequency(timer_t* freq) 884 { 885 asm { "mrs %0, cntfrq_el0" : "=r" (*freq); } 886 } 887 } 888 else version (LDC) 889 { 890 extern (D) void QueryPerformanceCounter(timer_t* ctr) 891 { 892 import ldc.intrinsics: llvm_readcyclecounter; 893 *ctr = llvm_readcyclecounter(); 894 } 895 } 896 else 897 { 898 extern (D) void QueryPerformanceCounter(timer_t* ctr) 899 { 900 version (D_InlineAsm_X86) 901 { 902 asm 903 { 904 naked ; 905 mov ECX,EAX ; 906 rdtsc ; 907 mov [ECX],EAX ; 908 mov [ECX+4],EDX ; 909 ret ; 910 } 911 } 912 else version (D_InlineAsm_X86_64) 913 { 914 asm 915 { 916 naked ; 917 // rdtsc can produce skewed results without preceding lfence/mfence. 918 // this is what GNU/Linux does, but only use mfence here. 919 // see https://github.com/torvalds/linux/blob/03b9730b769fc4d87e40f6104f4c5b2e43889f19/arch/x86/include/asm/msr.h#L130-L154 920 mfence ; // serialize rdtsc instruction. 921 rdtsc ; 922 mov [RDI],EAX ; 923 mov [RDI+4],EDX ; 924 ret ; 925 } 926 } 927 else 928 { 929 static assert(0); 930 } 931 } 932 }