1 /** 2 * Check the arguments to `printf` and `scanf` against the `format` string. 3 * 4 * Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved 5 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 6 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/chkformat.d, _chkformat.d) 8 * Documentation: https://dlang.org/phobos/dmd_chkformat.html 9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/chkformat.d 10 */ 11 module dmd.chkformat; 12 13 //import core.stdc.stdio : printf, scanf; 14 import core.stdc.ctype : isdigit; 15 16 import dmd.astenums; 17 import dmd.cond; 18 import dmd.errors; 19 import dmd.expression; 20 import dmd.globals; 21 import dmd.identifier; 22 import dmd.location; 23 import dmd.mtype; 24 import dmd.target; 25 26 27 /****************************************** 28 * Check that arguments to a printf format string are compatible 29 * with that string. Issue errors for incompatibilities. 30 * 31 * Follows the C99 specification for printf. 32 * 33 * Takes a generous, rather than strict, view of compatiblity. 34 * For example, an unsigned value can be formatted with a signed specifier. 35 * 36 * Diagnosed incompatibilities are: 37 * 38 * 1. incompatible sizes which will cause argument misalignment 39 * 2. deferencing arguments that are not pointers 40 * 3. insufficient number of arguments 41 * 4. struct arguments 42 * 5. array and slice arguments 43 * 6. non-pointer arguments to `s` specifier 44 * 7. non-standard formats 45 * 8. undefined behavior per C99 46 * 47 * Per the C Standard, extra arguments are ignored. 48 * 49 * No attempt is made to fix the arguments or the format string. 50 * 51 * Params: 52 * loc = location for error messages 53 * format = format string 54 * args = arguments to match with format string 55 * isVa_list = if a "v" function (format check only) 56 * 57 * Returns: 58 * `true` if errors occurred 59 * References: 60 * C99 7.19.6.1 61 * https://www.cplusplus.com/reference/cstdio/printf/ 62 */ 63 bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list) 64 { 65 //printf("checkPrintFormat('%.*s')\n", cast(int)format.length, format.ptr); 66 size_t n; // index in args 67 for (size_t i = 0; i < format.length;) 68 { 69 if (format[i] != '%') 70 { 71 ++i; 72 continue; 73 } 74 bool widthStar; 75 bool precisionStar; 76 size_t j = i; 77 const fmt = parsePrintfFormatSpecifier(format, j, widthStar, precisionStar); 78 const slice = format[i .. j]; 79 i = j; 80 81 if (fmt == Format.percent) 82 continue; // "%%", no arguments 83 if (fmt == Format.GNU_m) 84 continue; // "%m", no arguments 85 86 if (isVa_list) 87 { 88 // format check only 89 if (fmt == Format.error) 90 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); 91 continue; 92 } 93 94 Expression getNextArg(ref bool skip) 95 { 96 if (n == args.length) 97 { 98 if (args.length < (n + 1)) 99 deprecation(loc, "more format specifiers than %d arguments", cast(int)n); 100 else 101 skip = true; 102 return null; 103 } 104 return args[n++]; 105 } 106 107 void errorMsg(const char* prefix, Expression arg, const char* texpect, Type tactual) 108 { 109 deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`", 110 prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars()); 111 } 112 113 if (widthStar) 114 { 115 bool skip; 116 auto e = getNextArg(skip); 117 if (skip) 118 continue; 119 if (!e) 120 return true; 121 auto t = e.type.toBasetype(); 122 if (t.ty != Tint32 && t.ty != Tuns32) 123 errorMsg("width ", e, "int", t); 124 } 125 126 if (precisionStar) 127 { 128 bool skip; 129 auto e = getNextArg(skip); 130 if (skip) 131 continue; 132 if (!e) 133 return true; 134 auto t = e.type.toBasetype(); 135 if (t.ty != Tint32 && t.ty != Tuns32) 136 errorMsg("precision ", e, "int", t); 137 } 138 139 bool skip; 140 auto e = getNextArg(skip); 141 if (skip) 142 continue; 143 if (!e) 144 return true; 145 auto t = e.type.toBasetype(); 146 auto tnext = t.nextOf(); 147 const c_longsize = target.c.longsize; 148 const ptrsize = target.ptrsize; 149 150 // Types which are promoted to int are allowed. 151 // Spec: C99 6.5.2.2.7 152 final switch (fmt) 153 { 154 case Format.u: // unsigned int 155 case Format.d: // int 156 if (t.ty != Tint32 && t.ty != Tuns32) 157 errorMsg(null, e, fmt == Format.u ? "uint" : "int", t); 158 break; 159 160 case Format.hhu: // unsigned char 161 case Format.hhd: // signed char 162 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint8 && t.ty != Tuns8) 163 errorMsg(null, e, fmt == Format.hhu ? "ubyte" : "byte", t); 164 break; 165 166 case Format.hu: // unsigned short int 167 case Format.hd: // short int 168 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint16 && t.ty != Tuns16) 169 errorMsg(null, e, fmt == Format.hu ? "ushort" : "short", t); 170 break; 171 172 case Format.lu: // unsigned long int 173 case Format.ld: // long int 174 if (!(t.isintegral() && t.size() == c_longsize)) 175 { 176 if (fmt == Format.lu) 177 errorMsg(null, e, (c_longsize == 4 ? "uint" : "ulong"), t); 178 else 179 errorMsg(null, e, (c_longsize == 4 ? "int" : "long"), t); 180 } 181 break; 182 183 case Format.llu: // unsigned long long int 184 case Format.lld: // long long int 185 if (t.ty != Tint64 && t.ty != Tuns64) 186 errorMsg(null, e, fmt == Format.llu ? "ulong" : "long", t); 187 break; 188 189 case Format.ju: // uintmax_t 190 case Format.jd: // intmax_t 191 if (t.ty != Tint64 && t.ty != Tuns64) 192 { 193 if (fmt == Format.ju) 194 errorMsg(null, e, "core.stdc.stdint.uintmax_t", t); 195 else 196 errorMsg(null, e, "core.stdc.stdint.intmax_t", t); 197 } 198 break; 199 200 case Format.zd: // size_t 201 if (!(t.isintegral() && t.size() == ptrsize)) 202 errorMsg(null, e, "size_t", t); 203 break; 204 205 case Format.td: // ptrdiff_t 206 if (!(t.isintegral() && t.size() == ptrsize)) 207 errorMsg(null, e, "ptrdiff_t", t); 208 break; 209 210 case Format.lg: 211 case Format.g: // double 212 if (t.ty != Tfloat64 && t.ty != Timaginary64) 213 errorMsg(null, e, "double", t); 214 break; 215 216 case Format.Lg: // long double 217 if (t.ty != Tfloat80 && t.ty != Timaginary80) 218 errorMsg(null, e, "real", t); 219 break; 220 221 case Format.p: // pointer 222 if (t.ty != Tpointer && t.ty != Tnull && t.ty != Tclass && t.ty != Tdelegate && t.ty != Taarray) 223 errorMsg(null, e, "void*", t); 224 break; 225 226 case Format.n: // pointer to int 227 if (!(t.ty == Tpointer && tnext.ty == Tint32)) 228 errorMsg(null, e, "int*", t); 229 break; 230 231 case Format.ln: // pointer to long int 232 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize)) 233 errorMsg(null, e, (c_longsize == 4 ? "int*" : "long*"), t); 234 break; 235 236 case Format.lln: // pointer to long long int 237 if (!(t.ty == Tpointer && tnext.ty == Tint64)) 238 errorMsg(null, e, "long*", t); 239 break; 240 241 case Format.hn: // pointer to short 242 if (!(t.ty == Tpointer && tnext.ty == Tint16)) 243 errorMsg(null, e, "short*", t); 244 break; 245 246 case Format.hhn: // pointer to signed char 247 if (!(t.ty == Tpointer && tnext.ty == Tint16)) 248 errorMsg(null, e, "byte*", t); 249 break; 250 251 case Format.jn: // pointer to intmax_t 252 if (!(t.ty == Tpointer && tnext.ty == Tint64)) 253 errorMsg(null, e, "core.stdc.stdint.intmax_t*", t); 254 break; 255 256 case Format.zn: // pointer to size_t 257 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == ptrsize)) 258 errorMsg(null, e, "size_t*", t); 259 break; 260 261 case Format.tn: // pointer to ptrdiff_t 262 if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == ptrsize)) 263 errorMsg(null, e, "ptrdiff_t*", t); 264 break; 265 266 case Format.c: // char 267 if (t.ty != Tint32 && t.ty != Tuns32) 268 errorMsg(null, e, "char", t); 269 break; 270 271 case Format.lc: // wint_t 272 if (t.ty != Tint32 && t.ty != Tuns32) 273 errorMsg(null, e, "wchar_t", t); 274 break; 275 276 case Format.s: // pointer to char string 277 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8))) 278 errorMsg(null, e, "char*", t); 279 break; 280 281 case Format.ls: // pointer to wchar_t string 282 if (!(t.ty == Tpointer && tnext.ty.isSomeChar && tnext.size() == target.c.wchar_tsize)) 283 errorMsg(null, e, "wchar_t*", t); 284 break; 285 286 case Format.error: 287 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); 288 break; 289 290 case Format.GNU_m: 291 case Format.POSIX_ms: 292 case Format.POSIX_mls: 293 case Format.percent: 294 assert(0); 295 } 296 } 297 return false; 298 } 299 300 /****************************************** 301 * Check that arguments to a scanf format string are compatible 302 * with that string. Issue errors for incompatibilities. 303 * 304 * Follows the C99 specification for scanf. 305 * 306 * Takes a generous, rather than strict, view of compatiblity. 307 * For example, an unsigned value can be formatted with a signed specifier. 308 * 309 * Diagnosed incompatibilities are: 310 * 311 * 1. incompatible sizes which will cause argument misalignment 312 * 2. deferencing arguments that are not pointers 313 * 3. insufficient number of arguments 314 * 4. struct arguments 315 * 5. array and slice arguments 316 * 6. non-standard formats 317 * 7. undefined behavior per C99 318 * 319 * Per the C Standard, extra arguments are ignored. 320 * 321 * No attempt is made to fix the arguments or the format string. 322 * 323 * Params: 324 * loc = location for error messages 325 * format = format string 326 * args = arguments to match with format string 327 * isVa_list = if a "v" function (format check only) 328 * 329 * Returns: 330 * `true` if errors occurred 331 * References: 332 * C99 7.19.6.2 333 * https://www.cplusplus.com/reference/cstdio/scanf/ 334 */ 335 bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list) 336 { 337 size_t n = 0; 338 for (size_t i = 0; i < format.length;) 339 { 340 if (format[i] != '%') 341 { 342 ++i; 343 continue; 344 } 345 bool asterisk; 346 size_t j = i; 347 const fmt = parseScanfFormatSpecifier(format, j, asterisk); 348 const slice = format[i .. j]; 349 i = j; 350 351 if (fmt == Format.percent || asterisk) 352 continue; // "%%", "%*": no arguments 353 354 if (isVa_list) 355 { 356 // format check only 357 if (fmt == Format.error) 358 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); 359 continue; 360 } 361 362 Expression getNextArg() 363 { 364 if (n == args.length) 365 { 366 if (!asterisk) 367 deprecation(loc, "more format specifiers than %d arguments", cast(int)n); 368 return null; 369 } 370 return args[n++]; 371 } 372 373 void errorMsg(const char* prefix, Expression arg, const char* texpect, Type tactual) 374 { 375 deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`", 376 prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars()); 377 } 378 379 auto e = getNextArg(); 380 if (!e) 381 return true; 382 383 auto t = e.type.toBasetype(); 384 auto tnext = t.nextOf(); 385 const c_longsize = target.c.longsize; 386 const ptrsize = target.ptrsize; 387 388 final switch (fmt) 389 { 390 case Format.n: 391 case Format.d: // pointer to int 392 if (!(t.ty == Tpointer && tnext.ty == Tint32)) 393 errorMsg(null, e, "int*", t); 394 break; 395 396 case Format.hhn: 397 case Format.hhd: // pointer to signed char 398 if (!(t.ty == Tpointer && tnext.ty == Tint16)) 399 errorMsg(null, e, "byte*", t); 400 break; 401 402 case Format.hn: 403 case Format.hd: // pointer to short 404 if (!(t.ty == Tpointer && tnext.ty == Tint16)) 405 errorMsg(null, e, "short*", t); 406 break; 407 408 case Format.ln: 409 case Format.ld: // pointer to long int 410 if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == c_longsize)) 411 errorMsg(null, e, (c_longsize == 4 ? "int*" : "long*"), t); 412 break; 413 414 case Format.lln: 415 case Format.lld: // pointer to long long int 416 if (!(t.ty == Tpointer && tnext.ty == Tint64)) 417 errorMsg(null, e, "long*", t); 418 break; 419 420 case Format.jn: 421 case Format.jd: // pointer to intmax_t 422 if (!(t.ty == Tpointer && tnext.ty == Tint64)) 423 errorMsg(null, e, "core.stdc.stdint.intmax_t*", t); 424 break; 425 426 case Format.zn: 427 case Format.zd: // pointer to size_t 428 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == ptrsize)) 429 errorMsg(null, e, "size_t*", t); 430 break; 431 432 case Format.tn: 433 case Format.td: // pointer to ptrdiff_t 434 if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == ptrsize)) 435 errorMsg(null, e, "ptrdiff_t*", t); 436 break; 437 438 case Format.u: // pointer to unsigned int 439 if (!(t.ty == Tpointer && tnext.ty == Tuns32)) 440 errorMsg(null, e, "uint*", t); 441 break; 442 443 case Format.hhu: // pointer to unsigned char 444 if (!(t.ty == Tpointer && tnext.ty == Tuns8)) 445 errorMsg(null, e, "ubyte*", t); 446 break; 447 448 case Format.hu: // pointer to unsigned short int 449 if (!(t.ty == Tpointer && tnext.ty == Tuns16)) 450 errorMsg(null, e, "ushort*", t); 451 break; 452 453 case Format.lu: // pointer to unsigned long int 454 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == c_longsize)) 455 errorMsg(null, e, (c_longsize == 4 ? "uint*" : "ulong*"), t); 456 break; 457 458 case Format.llu: // pointer to unsigned long long int 459 if (!(t.ty == Tpointer && tnext.ty == Tuns64)) 460 errorMsg(null, e, "ulong*", t); 461 break; 462 463 case Format.ju: // pointer to uintmax_t 464 if (!(t.ty == Tpointer && tnext.ty == Tuns64)) 465 errorMsg(null, e, "core.stdc.stdint.uintmax_t*", t); 466 break; 467 468 case Format.g: // pointer to float 469 if (!(t.ty == Tpointer && tnext.ty == Tfloat32)) 470 errorMsg(null, e, "float*", t); 471 break; 472 473 case Format.lg: // pointer to double 474 if (!(t.ty == Tpointer && tnext.ty == Tfloat64)) 475 errorMsg(null, e, "double*", t); 476 break; 477 478 case Format.Lg: // pointer to long double 479 if (!(t.ty == Tpointer && tnext.ty == Tfloat80)) 480 errorMsg(null, e, "real*", t); 481 break; 482 483 case Format.c: 484 case Format.s: // pointer to char string 485 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8))) 486 errorMsg(null, e, "char*", t); 487 break; 488 489 case Format.lc: 490 case Format.ls: // pointer to wchar_t string 491 if (!(t.ty == Tpointer && tnext.ty.isSomeChar && tnext.size() == target.c.wchar_tsize)) 492 errorMsg(null, e, "wchar_t*", t); 493 break; 494 495 case Format.p: // double pointer 496 if (!(t.ty == Tpointer && tnext.ty == Tpointer)) 497 errorMsg(null, e, "void**", t); 498 break; 499 500 case Format.POSIX_ms: // pointer to pointer to char string 501 Type tnext2 = tnext ? tnext.nextOf() : null; 502 if (!(t.ty == Tpointer && tnext.ty == Tpointer && (tnext2.ty == Tchar || tnext2.ty == Tint8 || tnext2.ty == Tuns8))) 503 errorMsg(null, e, "char**", t); 504 break; 505 506 case Format.POSIX_mls: // pointer to pointer to wchar_t string 507 Type tnext2 = tnext ? tnext.nextOf() : null; 508 if (!(t.ty == Tpointer && tnext.ty == Tpointer && tnext2.ty.isSomeChar && tnext2.size() == target.c.wchar_tsize)) 509 errorMsg(null, e, "wchar_t**", t); 510 break; 511 512 case Format.error: 513 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); 514 break; 515 516 case Format.GNU_m: 517 case Format.percent: 518 assert(0); 519 } 520 } 521 return false; 522 } 523 524 private: 525 526 /************************************** 527 * Parse the *format specifier* which is of the form: 528 * 529 * `%[*][width][length]specifier` 530 * 531 * Params: 532 * format = format string 533 * idx = index of `%` of start of format specifier, 534 * which gets updated to index past the end of it, 535 * even if `Format.error` is returned 536 * asterisk = set if there is a `*` sub-specifier 537 * Returns: 538 * Format 539 */ 540 Format parseScanfFormatSpecifier(scope const char[] format, ref size_t idx, 541 out bool asterisk) nothrow pure @safe 542 { 543 auto i = idx; 544 assert(format[i] == '%'); 545 const length = format.length; 546 547 Format error() 548 { 549 idx = i; 550 return Format.error; 551 } 552 553 ++i; 554 if (i == length) 555 return error(); 556 557 if (format[i] == '%') 558 { 559 idx = i + 1; 560 return Format.percent; 561 } 562 563 // * sub-specifier 564 if (format[i] == '*') 565 { 566 ++i; 567 if (i == length) 568 return error(); 569 asterisk = true; 570 } 571 572 // fieldWidth 573 while (isdigit(format[i])) 574 { 575 i++; 576 if (i == length) 577 return error(); 578 } 579 580 /* Read the specifier 581 */ 582 Format specifier; 583 Modifier flags = Modifier.none; 584 switch (format[i]) 585 { 586 case 'm': 587 // https://pubs.opengroup.org/onlinepubs/9699919799/functions/scanf.html 588 // POSIX.1-2017 C Extension (CX) 589 flags = Modifier.m; 590 ++i; 591 if (i == length) 592 return error(); 593 if (format[i] == 'l') 594 { 595 ++i; 596 if (i == length) 597 return error(); 598 flags = Modifier.ml; 599 } 600 601 // Check valid conversion types for %m. 602 if (format[i] == 'c' || format[i] == 's') 603 specifier = flags == Modifier.ml ? Format.POSIX_mls : 604 Format.POSIX_ms; 605 else if (format[i] == 'C' || format[i] == 'S') 606 specifier = flags == Modifier.m ? Format.POSIX_mls : 607 Format.error; 608 else if (format[i] == '[') 609 goto case '['; 610 else 611 specifier = Format.error; 612 ++i; 613 break; 614 615 case 'l': 616 // Look for wchar_t scanset %l[..] 617 immutable j = i + 1; 618 if (j < length && format[j] == '[') 619 { 620 i = j; 621 flags = Modifier.l; 622 goto case '['; 623 } 624 goto default; 625 626 case '[': 627 // Read the scanset 628 i++; 629 if (i == length) 630 return error(); 631 // If the conversion specifier begins with `[]` or `[^]`, the right 632 // bracket character is not the terminator, but in the scanlist. 633 if (format[i] == '^') 634 { 635 i++; 636 if (i == length) 637 return error(); 638 } 639 if (format[i] == ']') 640 { 641 i++; 642 if (i == length) 643 return error(); 644 } 645 // A scanset can be anything, so we just check that it is paired 646 while (i < length) 647 { 648 if (format[i] == ']') 649 break; 650 ++i; 651 } 652 // no `]` found 653 if (i == length) 654 return error(); 655 656 specifier = flags == Modifier.none ? Format.s : 657 flags == Modifier.l ? Format.ls : 658 flags == Modifier.m ? Format.POSIX_ms : 659 flags == Modifier.ml ? Format.POSIX_mls : 660 Format.error; 661 ++i; 662 break; 663 664 default: 665 char genSpec; 666 specifier = parseGenericFormatSpecifier(format, i, genSpec); 667 if (specifier == Format.error) 668 return error(); 669 break; 670 } 671 672 idx = i; 673 return specifier; // success 674 } 675 676 /************************************** 677 * Parse the *format specifier* which is of the form: 678 * 679 * `%[flags][field width][.precision][length modifier]specifier` 680 * 681 * Params: 682 * format = format string 683 * idx = index of `%` of start of format specifier, 684 * which gets updated to index past the end of it, 685 * even if `Format.error` is returned 686 * widthStar = set if * for width 687 * precisionStar = set if * for precision 688 * useGNUExts = true if parsing GNU format extensions 689 * Returns: 690 * Format 691 */ 692 Format parsePrintfFormatSpecifier(scope const char[] format, ref size_t idx, 693 out bool widthStar, out bool precisionStar, bool useGNUExts = 694 findCondition(global.versionids, Identifier.idPool("CRuntime_Glibc"))) nothrow pure @safe 695 { 696 auto i = idx; 697 assert(format[i] == '%'); 698 const length = format.length; 699 bool hash; 700 bool zero; 701 bool flags; 702 bool width; 703 bool precision; 704 705 Format error() 706 { 707 idx = i; 708 return Format.error; 709 } 710 711 ++i; 712 if (i == length) 713 return error(); 714 715 if (format[i] == '%') 716 { 717 idx = i + 1; 718 return Format.percent; 719 } 720 721 /* Read the `flags` 722 */ 723 while (1) 724 { 725 const c = format[i]; 726 if (c == '-' || 727 c == '+' || 728 c == ' ') 729 { 730 flags = true; 731 } 732 else if (c == '#') 733 { 734 hash = true; 735 } 736 else if (c == '0') 737 { 738 zero = true; 739 } 740 else 741 break; 742 ++i; 743 if (i == length) 744 return error(); 745 } 746 747 /* Read the `field width` 748 */ 749 { 750 const c = format[i]; 751 if (c == '*') 752 { 753 width = true; 754 widthStar = true; 755 ++i; 756 if (i == length) 757 return error(); 758 } 759 else if ('1' <= c && c <= '9') 760 { 761 width = true; 762 ++i; 763 if (i == length) 764 return error(); 765 while ('0' <= format[i] && format[i] <= '9') 766 { 767 ++i; 768 if (i == length) 769 return error(); 770 } 771 } 772 } 773 774 /* Read the `precision` 775 */ 776 if (format[i] == '.') 777 { 778 precision = true; 779 ++i; 780 if (i == length) 781 return error(); 782 const c = format[i]; 783 if (c == '*') 784 { 785 precisionStar = true; 786 ++i; 787 if (i == length) 788 return error(); 789 } 790 else if ('0' <= c && c <= '9') 791 { 792 ++i; 793 if (i == length) 794 return error(); 795 while ('0' <= format[i] && format[i] <= '9') 796 { 797 ++i; 798 if (i == length) 799 return error(); 800 } 801 } 802 } 803 804 /* Read the specifier 805 */ 806 char genSpec; 807 Format specifier; 808 switch (format[i]) 809 { 810 case 'm': 811 // https://www.gnu.org/software/libc/manual/html_node/Other-Output-Conversions.html 812 if (useGNUExts) 813 { 814 specifier = Format.GNU_m; 815 genSpec = format[i]; 816 ++i; 817 break; 818 } 819 goto default; 820 821 default: 822 specifier = parseGenericFormatSpecifier(format, i, genSpec); 823 if (specifier == Format.error) 824 return error(); 825 break; 826 } 827 828 switch (genSpec) 829 { 830 case 'c': 831 case 's': 832 case 'C': 833 case 'S': 834 if (hash || zero) 835 return error(); 836 break; 837 838 case 'd': 839 case 'i': 840 if (hash) 841 return error(); 842 break; 843 844 case 'm': 845 if (hash || zero || flags) 846 return error(); 847 break; 848 849 case 'n': 850 if (hash || zero || precision || width || flags) 851 return error(); 852 break; 853 854 default: 855 break; 856 } 857 858 idx = i; 859 return specifier; // success 860 } 861 862 /* Different kinds of conversion modifiers. */ 863 enum Modifier 864 { 865 none, 866 h, // short 867 hh, // char 868 j, // intmax_t 869 l, // wint_t/wchar_t 870 ll, // long long int 871 L, // long double 872 m, // char** 873 ml, // wchar_t** 874 t, // ptrdiff_t 875 z // size_t 876 } 877 878 /* Different kinds of formatting specifications, variations we don't 879 care about are merged. (Like we don't care about the difference between 880 f, e, g, a, etc.) 881 882 For `scanf`, every format is a pointer. 883 */ 884 enum Format 885 { 886 d, // int 887 hhd, // signed char 888 hd, // short int 889 ld, // long int 890 lld, // long long int 891 jd, // intmax_t 892 zd, // size_t 893 td, // ptrdiff_t 894 u, // unsigned int 895 hhu, // unsigned char 896 hu, // unsigned short int 897 lu, // unsigned long int 898 llu, // unsigned long long int 899 ju, // uintmax_t 900 g, // float (scanf) / double (printf) 901 lg, // double (scanf) 902 Lg, // long double (both) 903 s, // char string (both) 904 ls, // wchar_t string (both) 905 c, // char (printf) 906 lc, // wint_t (printf) 907 p, // pointer 908 n, // pointer to int 909 hhn, // pointer to signed char 910 hn, // pointer to short 911 ln, // pointer to long int 912 lln, // pointer to long long int 913 jn, // pointer to intmax_t 914 zn, // pointer to size_t 915 tn, // pointer to ptrdiff_t 916 GNU_m, // GNU ext. : string corresponding to the error code in errno (printf) 917 POSIX_ms, // POSIX ext. : dynamically allocated char string (scanf) 918 POSIX_mls, // POSIX ext. : dynamically allocated wchar_t string (scanf) 919 percent, // %% (i.e. no argument) 920 error, // invalid format specification 921 } 922 923 /************************************** 924 * Parse the *length specifier* and the *specifier* of the following form: 925 * `[length]specifier` 926 * 927 * Params: 928 * format = format string 929 * idx = index of of start of format specifier, 930 * which gets updated to index past the end of it, 931 * even if `Format.error` is returned 932 * genSpecifier = Generic specifier. For instance, it will be set to `d` if the 933 * format is `hdd`. 934 * Returns: 935 * Format 936 */ 937 Format parseGenericFormatSpecifier(scope const char[] format, 938 ref size_t idx, out char genSpecifier) nothrow pure @safe 939 { 940 const length = format.length; 941 942 /* Read the `length modifier` 943 */ 944 const lm = format[idx]; 945 Modifier flags; 946 switch (lm) 947 { 948 case 'j': 949 case 'z': 950 case 't': 951 case 'L': 952 flags = lm == 'j' ? Modifier.j : 953 lm == 'z' ? Modifier.z : 954 lm == 't' ? Modifier.t : 955 Modifier.L; 956 ++idx; 957 if (idx == length) 958 return Format.error; 959 break; 960 961 case 'h': 962 case 'l': 963 ++idx; 964 if (idx == length) 965 return Format.error; 966 if (lm == format[idx]) 967 { 968 flags = lm == 'h' ? Modifier.hh : Modifier.ll; 969 ++idx; 970 if (idx == length) 971 return Format.error; 972 } 973 else 974 flags = lm == 'h' ? Modifier.h : Modifier.l; 975 break; 976 977 default: 978 flags = Modifier.none; 979 break; 980 } 981 982 /* Read the `specifier` 983 */ 984 Format specifier; 985 const sc = format[idx]; 986 genSpecifier = sc; 987 switch (sc) 988 { 989 case 'd': 990 case 'i': 991 specifier = flags == Modifier.none ? Format.d : 992 flags == Modifier.hh ? Format.hhd : 993 flags == Modifier.h ? Format.hd : 994 flags == Modifier.ll ? Format.lld : 995 flags == Modifier.l ? Format.ld : 996 flags == Modifier.j ? Format.jd : 997 flags == Modifier.z ? Format.zd : 998 flags == Modifier.t ? Format.td : 999 Format.error; 1000 break; 1001 1002 case 'u': 1003 case 'o': 1004 case 'x': 1005 case 'X': 1006 specifier = flags == Modifier.none ? Format.u : 1007 flags == Modifier.hh ? Format.hhu : 1008 flags == Modifier.h ? Format.hu : 1009 flags == Modifier.ll ? Format.llu : 1010 flags == Modifier.l ? Format.lu : 1011 flags == Modifier.j ? Format.ju : 1012 flags == Modifier.z ? Format.zd : 1013 flags == Modifier.t ? Format.td : 1014 Format.error; 1015 break; 1016 1017 case 'f': 1018 case 'F': 1019 case 'e': 1020 case 'E': 1021 case 'g': 1022 case 'G': 1023 case 'a': 1024 case 'A': 1025 specifier = flags == Modifier.none ? Format.g : 1026 flags == Modifier.L ? Format.Lg : 1027 flags == Modifier.l ? Format.lg : 1028 Format.error; 1029 break; 1030 1031 case 'c': 1032 specifier = flags == Modifier.none ? Format.c : 1033 flags == Modifier.l ? Format.lc : 1034 Format.error; 1035 break; 1036 1037 case 's': 1038 specifier = flags == Modifier.none ? Format.s : 1039 flags == Modifier.l ? Format.ls : 1040 Format.error; 1041 break; 1042 1043 case 'p': 1044 specifier = flags == Modifier.none ? Format.p : 1045 Format.error; 1046 break; 1047 1048 case 'n': 1049 specifier = flags == Modifier.none ? Format.n : 1050 flags == Modifier.ll ? Format.lln : 1051 flags == Modifier.l ? Format.ln : 1052 flags == Modifier.hh ? Format.hhn : 1053 flags == Modifier.h ? Format.hn : 1054 flags == Modifier.j ? Format.jn : 1055 flags == Modifier.z ? Format.zn : 1056 flags == Modifier.t ? Format.tn : 1057 Format.error; 1058 break; 1059 1060 case 'C': 1061 // POSIX.1-2017 X/Open System Interfaces (XSI) 1062 // %C format is equivalent to %lc 1063 specifier = flags == Modifier.none ? Format.lc : 1064 Format.error; 1065 break; 1066 1067 case 'S': 1068 // POSIX.1-2017 X/Open System Interfaces (XSI) 1069 // %S format is equivalent to %ls 1070 specifier = flags == Modifier.none ? Format.ls : 1071 Format.error; 1072 break; 1073 1074 default: 1075 specifier = Format.error; 1076 break; 1077 } 1078 1079 ++idx; 1080 return specifier; // success 1081 } 1082 1083 @("parseGenericFormatSpecifier") unittest 1084 { 1085 char genSpecifier; 1086 size_t idx; 1087 1088 void testG(string fmtStr, Format expectedFormat, char expectedGenSpecifier) 1089 { 1090 idx = 0; 1091 assert(parseGenericFormatSpecifier(fmtStr, idx, genSpecifier) == expectedFormat); 1092 assert(genSpecifier == expectedGenSpecifier); 1093 } 1094 1095 testG("hhd", Format.hhd, 'd'); 1096 testG("hn", Format.hn, 'n'); 1097 testG("ji", Format.jd, 'i'); 1098 testG("lu", Format.lu, 'u'); 1099 1100 idx = 0; 1101 assert(parseGenericFormatSpecifier("k", idx, genSpecifier) == Format.error); 1102 } 1103 1104 @("parsePrintfFormatSpecifier") unittest 1105 { 1106 bool useGNUExts = false; 1107 1108 size_t idx = 0; 1109 bool widthStar; 1110 bool precisionStar; 1111 1112 void testP(string fmtStr, Format expectedFormat, size_t expectedIdx) 1113 { 1114 idx = 0; 1115 assert(parsePrintfFormatSpecifier(fmtStr, idx, widthStar, precisionStar, useGNUExts) == expectedFormat); 1116 assert(idx == expectedIdx); 1117 } 1118 1119 // one for each Format 1120 testP("%d", Format.d, 2); 1121 assert(!widthStar && !precisionStar); 1122 1123 testP("%ld", Format.ld, 3); 1124 testP("%lld", Format.lld, 4); 1125 testP("%jd", Format.jd, 3); 1126 testP("%zd", Format.zd, 3); 1127 testP("%td", Format.td, 3); 1128 testP("%g", Format.g, 2); 1129 testP("%Lg", Format.Lg, 3); 1130 testP("%p", Format.p, 2); 1131 testP("%n", Format.n, 2); 1132 testP("%ln", Format.ln, 3); 1133 testP("%lln", Format.lln, 4); 1134 testP("%hn", Format.hn, 3); 1135 testP("%hhn", Format.hhn, 4); 1136 testP("%jn", Format.jn, 3); 1137 testP("%zn", Format.zn, 3); 1138 testP("%tn", Format.tn, 3); 1139 testP("%c", Format.c, 2); 1140 testP("%lc", Format.lc, 3); 1141 testP("%s", Format.s, 2); 1142 testP("%ls", Format.ls, 3); 1143 testP("%%", Format.percent, 2); 1144 1145 // Synonyms 1146 testP("%i", Format.d, 2); 1147 testP("%u", Format.u, 2); 1148 testP("%o", Format.u, 2); 1149 testP("%x", Format.u, 2); 1150 testP("%X", Format.u, 2); 1151 testP("%f", Format.g, 2); 1152 testP("%F", Format.g, 2); 1153 testP("%G", Format.g, 2); 1154 testP("%a", Format.g, 2); 1155 testP("%La", Format.Lg, 3); 1156 testP("%A", Format.g, 2); 1157 testP("%lg", Format.lg, 3); 1158 1159 // width, precision 1160 testP("%*d", Format.d, 3); 1161 assert(widthStar && !precisionStar); 1162 1163 testP("%.*d", Format.d, 4); 1164 assert(!widthStar && precisionStar); 1165 1166 testP("%*.*d", Format.d, 5); 1167 assert(widthStar && precisionStar); 1168 1169 // Too short formats 1170 foreach (s; ["%", "%-", "%+", "% ", "%#", "%0", "%*", "%1", "%19", "%.", "%.*", "%.1", "%.12", 1171 "%j", "%z", "%t", "%l", "%h", "%ll", "%hh"]) 1172 { 1173 testP(s, Format.error, s.length); 1174 } 1175 1176 // Undefined format combinations 1177 foreach (s; ["%#d", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg", 1178 "%#c", "%0c", "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc", 1179 "%#s", "%0s", "%js", "%zs", "%ts", "%Ls", "%hs", "%hhs", "%lls", 1180 "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp", 1181 "%-n", "%+n", "% n", "%#n", "%0n", "%*n", "%1n", "%19n", "%.n", "%.*n", "%.1n", "%.12n", "%Ln", "%K"]) 1182 { 1183 testP(s, Format.error, s.length); 1184 } 1185 1186 testP("%C", Format.lc, 2); 1187 testP("%S", Format.ls, 2); 1188 1189 // GNU extensions: explicitly toggle ISO/GNU flag. 1190 foreach (s; ["%jm", "%zm", "%tm", "%Lm", "%hm", "%hhm", "%lm", "%llm", 1191 "%#m", "%+m", "%-m", "% m", "%0m"]) 1192 { 1193 useGNUExts = false; 1194 testP(s, Format.error, s.length); 1195 useGNUExts = true; 1196 testP(s, Format.error, s.length); 1197 } 1198 1199 foreach (s; ["%m", "%md", "%mz", "%mc", "%mm", "%msyz", "%ml", "%mlz", "%mlc", "%mlm"]) 1200 { 1201 // valid cases, all parsed as `%m` 1202 // GNU printf() 1203 useGNUExts = true; 1204 testP(s, Format.GNU_m, 2); 1205 1206 // ISO printf() 1207 useGNUExts = false; 1208 testP(s, Format.error, 2); 1209 } 1210 } 1211 1212 @("parseScanfFormatSpecifier") unittest 1213 { 1214 size_t idx; 1215 bool asterisk; 1216 1217 void testS(string fmtStr, Format expectedFormat, size_t expectedIdx) 1218 { 1219 idx = 0; 1220 assert(parseScanfFormatSpecifier(fmtStr, idx, asterisk) == expectedFormat); 1221 assert(idx == expectedIdx); 1222 } 1223 1224 // one for each Format 1225 testS("%d", Format.d, 2); 1226 testS("%hhd", Format.hhd, 4); 1227 testS("%hd", Format.hd, 3); 1228 testS("%ld", Format.ld, 3); 1229 testS("%lld", Format.lld, 4); 1230 testS("%jd", Format.jd, 3); 1231 testS("%zd", Format.zd, 3); 1232 testS("%td", Format.td, 3); 1233 testS("%u", Format.u, 2); 1234 testS("%hhu", Format.hhu, 4); 1235 testS("%hu", Format.hu, 3); 1236 testS("%lu", Format.lu, 3); 1237 testS("%llu", Format.llu, 4); 1238 testS("%ju", Format.ju, 3); 1239 testS("%g", Format.g, 2); 1240 testS("%lg", Format.lg, 3); 1241 testS("%Lg", Format.Lg, 3); 1242 testS("%p", Format.p, 2); 1243 testS("%s", Format.s, 2); 1244 testS("%ls", Format.ls, 3); 1245 testS("%%", Format.percent, 2); 1246 1247 // Synonyms 1248 testS("%i", Format.d, 2); 1249 testS("%n", Format.n, 2); 1250 1251 testS("%o", Format.u, 2); 1252 testS("%x", Format.u, 2); 1253 testS("%f", Format.g, 2); 1254 testS("%e", Format.g, 2); 1255 testS("%a", Format.g, 2); 1256 testS("%c", Format.c, 2); 1257 1258 // asterisk 1259 testS("%*d", Format.d, 3); 1260 assert(asterisk); 1261 1262 testS("%9ld", Format.ld, 4); 1263 assert(!asterisk); 1264 1265 testS("%*25984hhd", Format.hhd, 10); 1266 assert(asterisk); 1267 1268 // scansets 1269 testS("%[a-zA-Z]", Format.s, 9); 1270 assert(!asterisk); 1271 1272 testS("%*25l[a-z]", Format.ls, 10); 1273 assert(asterisk); 1274 1275 testS("%[]]", Format.s, 4); 1276 assert(!asterisk); 1277 1278 testS("%[^]]", Format.s, 5); 1279 assert(!asterisk); 1280 1281 // Too short formats 1282 foreach (s; ["%", "% ", "%#", "%0", "%*", "%1", "%19", 1283 "%j", "%z", "%t", "%l", "%h", "%ll", "%hh", "%K"]) 1284 { 1285 1286 testS(s, Format.error, s.length); 1287 } 1288 1289 1290 // Undefined format combinations 1291 foreach (s; ["%Ld", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg", 1292 "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc", 1293 "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp", 1294 "%-", "%+", "%#", "%0", "%.", "%Ln"]) 1295 { 1296 1297 testS(s, Format.error, s.length); 1298 1299 } 1300 1301 // Invalid scansets 1302 foreach (s; ["%[]", "%[^", "%[^]", "%[s", "%[0-9lld", "%[", "%l[^]"]) 1303 { 1304 1305 testS(s, Format.error, s.length); 1306 } 1307 1308 // Posix extensions 1309 foreach (s; ["%jm", "%zm", "%tm", "%Lm", "%hm", "%hhm", "%lm", "%llm", 1310 "%m", "%ma", "%md", "%ml", "%mm", "%mlb", "%mlj", "%mlr", "%mlz", 1311 "%LC", "%lC", "%llC", "%jC", "%tC", "%hC", "%hhC", "%zC", 1312 "%LS", "%lS", "%llS", "%jS", "%tS", "%hS", "%hhS", "%zS"]) 1313 { 1314 1315 testS(s, Format.error, s.length); 1316 } 1317 1318 testS("%mc", Format.POSIX_ms, 3); 1319 testS("%ms", Format.POSIX_ms, 3); 1320 testS("%m[0-9]", Format.POSIX_ms, 7); 1321 testS("%mlc", Format.POSIX_mls, 4); 1322 testS("%mls", Format.POSIX_mls, 4); 1323 testS("%ml[^0-9]", Format.POSIX_mls, 9); 1324 testS("%mC", Format.POSIX_mls, 3); 1325 testS("%mS", Format.POSIX_mls, 3); 1326 1327 testS("%C", Format.lc, 2); 1328 testS("%S", Format.ls, 2); 1329 }