1 /** 2 * Check the arguments to `printf` and `scanf` against the `format` string. 3 * 4 * Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved 5 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 6 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/chkformat.d, _chkformat.d) 8 * Documentation: https://dlang.org/phobos/dmd_chkformat.html 9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/chkformat.d 10 */ 11 module dmd.chkformat; 12 13 //import core.stdc.stdio : printf, scanf; 14 import core.stdc.ctype : isdigit; 15 16 import dmd.astenums; 17 import dmd.cond; 18 import dmd.errorsink; 19 import dmd.expression; 20 import dmd.globals; 21 import dmd.identifier; 22 import dmd.location; 23 import dmd.mtype; 24 import dmd.target; 25 26 27 /****************************************** 28 * Check that arguments to a printf format string are compatible 29 * with that string. Issue errors for incompatibilities. 30 * 31 * Follows the C99 specification for printf. 32 * 33 * Takes a generous, rather than strict, view of compatiblity. 34 * For example, an unsigned value can be formatted with a signed specifier. 35 * 36 * Diagnosed incompatibilities are: 37 * 38 * 1. incompatible sizes which will cause argument misalignment 39 * 2. deferencing arguments that are not pointers 40 * 3. insufficient number of arguments 41 * 4. struct arguments 42 * 5. array and slice arguments 43 * 6. non-pointer arguments to `s` specifier 44 * 7. non-standard formats 45 * 8. undefined behavior per C99 46 * 47 * Per the C Standard, extra arguments are ignored. 48 * 49 * No attempt is made to fix the arguments or the format string. 50 * 51 * Params: 52 * loc = location for error messages 53 * format = format string 54 * args = arguments to match with format string 55 * isVa_list = if a "v" function (format check only) 56 * eSink = where the error messages go 57 * 58 * Returns: 59 * `true` if errors occurred 60 * References: 61 * C99 7.19.6.1 62 * https://www.cplusplus.com/reference/cstdio/printf/ 63 */ 64 public 65 bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list, ErrorSink eSink) 66 { 67 //printf("checkPrintFormat('%.*s')\n", cast(int)format.length, format.ptr); 68 size_t n; // index in args 69 for (size_t i = 0; i < format.length;) 70 { 71 if (format[i] != '%') 72 { 73 ++i; 74 continue; 75 } 76 bool widthStar; 77 bool precisionStar; 78 size_t j = i; 79 const fmt = parsePrintfFormatSpecifier(format, j, widthStar, precisionStar); 80 const slice = format[i .. j]; 81 i = j; 82 83 if (fmt == Format.percent) 84 continue; // "%%", no arguments 85 if (fmt == Format.GNU_m) 86 continue; // "%m", no arguments 87 88 if (isVa_list) 89 { 90 // format check only 91 if (fmt == Format.error) 92 eSink.deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); 93 continue; 94 } 95 96 Expression getNextArg(ref bool skip) 97 { 98 if (n == args.length) 99 { 100 if (args.length < (n + 1)) 101 eSink.deprecation(loc, "more format specifiers than %d arguments", cast(int)n); 102 else 103 skip = true; 104 return null; 105 } 106 return args[n++]; 107 } 108 109 void errorMsg(const char* prefix, Expression arg, const char* texpect, Type tactual) 110 { 111 eSink.deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`", 112 prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars()); 113 } 114 115 if (widthStar) 116 { 117 bool skip; 118 auto e = getNextArg(skip); 119 if (skip) 120 continue; 121 if (!e) 122 return true; 123 auto t = e.type.toBasetype(); 124 if (t.ty != Tint32 && t.ty != Tuns32) 125 errorMsg("width ", e, "int", t); 126 } 127 128 if (precisionStar) 129 { 130 bool skip; 131 auto e = getNextArg(skip); 132 if (skip) 133 continue; 134 if (!e) 135 return true; 136 auto t = e.type.toBasetype(); 137 if (t.ty != Tint32 && t.ty != Tuns32) 138 errorMsg("precision ", e, "int", t); 139 } 140 141 bool skip; 142 auto e = getNextArg(skip); 143 if (skip) 144 continue; 145 if (!e) 146 return true; 147 auto t = e.type.toBasetype(); 148 auto tnext = t.nextOf(); 149 const c_longsize = target.c.longsize; 150 const ptrsize = target.ptrsize; 151 152 // Types which are promoted to int are allowed. 153 // Spec: C99 6.5.2.2.7 154 final switch (fmt) 155 { 156 case Format.u: // unsigned int 157 case Format.d: // int 158 if (t.ty != Tint32 && t.ty != Tuns32) 159 errorMsg(null, e, fmt == Format.u ? "uint" : "int", t); 160 break; 161 162 case Format.hhu: // unsigned char 163 case Format.hhd: // signed char 164 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint8 && t.ty != Tuns8) 165 errorMsg(null, e, fmt == Format.hhu ? "ubyte" : "byte", t); 166 break; 167 168 case Format.hu: // unsigned short int 169 case Format.hd: // short int 170 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint16 && t.ty != Tuns16) 171 errorMsg(null, e, fmt == Format.hu ? "ushort" : "short", t); 172 break; 173 174 case Format.lu: // unsigned long int 175 case Format.ld: // long int 176 if (!(t.isintegral() && t.size() == c_longsize)) 177 { 178 if (fmt == Format.lu) 179 errorMsg(null, e, (c_longsize == 4 ? "uint" : "ulong"), t); 180 else 181 errorMsg(null, e, (c_longsize == 4 ? "int" : "long"), t); 182 if (t.isintegral() && t.size() != c_longsize) 183 eSink.errorSupplemental(e.loc, "C `long` is %d bytes on your system", c_longsize); 184 } 185 break; 186 187 case Format.llu: // unsigned long long int 188 case Format.lld: // long long int 189 if (t.ty != Tint64 && t.ty != Tuns64) 190 errorMsg(null, e, fmt == Format.llu ? "ulong" : "long", t); 191 break; 192 193 case Format.ju: // uintmax_t 194 case Format.jd: // intmax_t 195 if (t.ty != Tint64 && t.ty != Tuns64) 196 { 197 if (fmt == Format.ju) 198 errorMsg(null, e, "core.stdc.stdint.uintmax_t", t); 199 else 200 errorMsg(null, e, "core.stdc.stdint.intmax_t", t); 201 } 202 break; 203 204 case Format.zd: // size_t 205 if (!(t.isintegral() && t.size() == ptrsize)) 206 errorMsg(null, e, "size_t", t); 207 break; 208 209 case Format.td: // ptrdiff_t 210 if (!(t.isintegral() && t.size() == ptrsize)) 211 errorMsg(null, e, "ptrdiff_t", t); 212 break; 213 214 case Format.lg: 215 case Format.g: // double 216 if (t.ty != Tfloat64 && t.ty != Timaginary64) 217 errorMsg(null, e, "double", t); 218 break; 219 220 case Format.Lg: // long double 221 if (t.ty != Tfloat80 && t.ty != Timaginary80) 222 errorMsg(null, e, "real", t); 223 break; 224 225 case Format.p: // pointer 226 if (t.ty != Tpointer && t.ty != Tnull && t.ty != Tclass && t.ty != Tdelegate && t.ty != Taarray) 227 errorMsg(null, e, "void*", t); 228 break; 229 230 case Format.n: // pointer to int 231 if (!(t.ty == Tpointer && tnext.ty == Tint32 && tnext.isMutable())) 232 errorMsg(null, e, "int*", t); 233 break; 234 235 case Format.ln: // pointer to long int 236 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize)) 237 errorMsg(null, e, (c_longsize == 4 ? "int*" : "long*"), t); 238 break; 239 240 case Format.lln: // pointer to long long int 241 if (!(t.ty == Tpointer && tnext.ty == Tint64)) 242 errorMsg(null, e, "long*", t); 243 break; 244 245 case Format.hn: // pointer to short 246 if (!(t.ty == Tpointer && tnext.ty == Tint16)) 247 errorMsg(null, e, "short*", t); 248 break; 249 250 case Format.hhn: // pointer to signed char 251 if (!(t.ty == Tpointer && tnext.ty == Tint16)) 252 errorMsg(null, e, "byte*", t); 253 break; 254 255 case Format.jn: // pointer to intmax_t 256 if (!(t.ty == Tpointer && tnext.ty == Tint64)) 257 errorMsg(null, e, "core.stdc.stdint.intmax_t*", t); 258 break; 259 260 case Format.zn: // pointer to size_t 261 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == ptrsize)) 262 errorMsg(null, e, "size_t*", t); 263 break; 264 265 case Format.tn: // pointer to ptrdiff_t 266 if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == ptrsize)) 267 errorMsg(null, e, "ptrdiff_t*", t); 268 break; 269 270 case Format.c: // char 271 if (t.ty != Tint32 && t.ty != Tuns32) 272 errorMsg(null, e, "char", t); 273 break; 274 275 case Format.lc: // wint_t 276 if (t.ty != Tint32 && t.ty != Tuns32) 277 errorMsg(null, e, "wchar_t", t); 278 break; 279 280 case Format.s: // pointer to char string 281 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8))) 282 errorMsg(null, e, "char*", t); 283 break; 284 285 case Format.ls: // pointer to wchar_t string 286 if (!(t.ty == Tpointer && tnext.ty.isSomeChar && tnext.size() == target.c.wchar_tsize)) 287 errorMsg(null, e, "wchar_t*", t); 288 break; 289 290 case Format.error: 291 eSink.deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); 292 break; 293 294 case Format.GNU_m: 295 case Format.POSIX_ms: 296 case Format.POSIX_mls: 297 case Format.percent: 298 assert(0); 299 } 300 } 301 return false; 302 } 303 304 /****************************************** 305 * Check that arguments to a scanf format string are compatible 306 * with that string. Issue errors for incompatibilities. 307 * 308 * Follows the C99 specification for scanf. 309 * 310 * Takes a generous, rather than strict, view of compatiblity. 311 * For example, an unsigned value can be formatted with a signed specifier. 312 * 313 * Diagnosed incompatibilities are: 314 * 315 * 1. incompatible sizes which will cause argument misalignment 316 * 2. deferencing arguments that are not pointers 317 * 3. insufficient number of arguments 318 * 4. struct arguments 319 * 5. array and slice arguments 320 * 6. non-standard formats 321 * 7. undefined behavior per C99 322 * 323 * Per the C Standard, extra arguments are ignored. 324 * 325 * No attempt is made to fix the arguments or the format string. 326 * 327 * Params: 328 * loc = location for error messages 329 * format = format string 330 * args = arguments to match with format string 331 * isVa_list = if a "v" function (format check only) 332 * eSink = where the error messages go 333 * 334 * Returns: 335 * `true` if errors occurred 336 * References: 337 * C99 7.19.6.2 338 * https://www.cplusplus.com/reference/cstdio/scanf/ 339 */ 340 public 341 bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list, ErrorSink eSink) 342 { 343 size_t n = 0; 344 for (size_t i = 0; i < format.length;) 345 { 346 if (format[i] != '%') 347 { 348 ++i; 349 continue; 350 } 351 bool asterisk; 352 size_t j = i; 353 const fmt = parseScanfFormatSpecifier(format, j, asterisk); 354 const slice = format[i .. j]; 355 i = j; 356 357 if (fmt == Format.percent || asterisk) 358 continue; // "%%", "%*": no arguments 359 360 if (isVa_list) 361 { 362 // format check only 363 if (fmt == Format.error) 364 eSink.deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); 365 continue; 366 } 367 368 Expression getNextArg() 369 { 370 if (n == args.length) 371 { 372 if (!asterisk) 373 eSink.deprecation(loc, "more format specifiers than %d arguments", cast(int)n); 374 return null; 375 } 376 return args[n++]; 377 } 378 379 void errorMsg(const char* prefix, Expression arg, const char* texpect, Type tactual) 380 { 381 eSink.deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`", 382 prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars()); 383 } 384 385 auto e = getNextArg(); 386 if (!e) 387 return true; 388 389 auto t = e.type.toBasetype(); 390 auto tnext = t.nextOf(); 391 const c_longsize = target.c.longsize; 392 const ptrsize = target.ptrsize; 393 394 final switch (fmt) 395 { 396 case Format.n: 397 case Format.d: // pointer to int 398 if (!(t.ty == Tpointer && tnext.ty == Tint32)) 399 errorMsg(null, e, "int*", t); 400 break; 401 402 case Format.hhn: 403 case Format.hhd: // pointer to signed char 404 if (!(t.ty == Tpointer && tnext.ty == Tint16)) 405 errorMsg(null, e, "byte*", t); 406 break; 407 408 case Format.hn: 409 case Format.hd: // pointer to short 410 if (!(t.ty == Tpointer && tnext.ty == Tint16)) 411 errorMsg(null, e, "short*", t); 412 break; 413 414 case Format.ln: 415 case Format.ld: // pointer to long int 416 if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == c_longsize)) 417 errorMsg(null, e, (c_longsize == 4 ? "int*" : "long*"), t); 418 break; 419 420 case Format.lln: 421 case Format.lld: // pointer to long long int 422 if (!(t.ty == Tpointer && tnext.ty == Tint64)) 423 errorMsg(null, e, "long*", t); 424 break; 425 426 case Format.jn: 427 case Format.jd: // pointer to intmax_t 428 if (!(t.ty == Tpointer && tnext.ty == Tint64)) 429 errorMsg(null, e, "core.stdc.stdint.intmax_t*", t); 430 break; 431 432 case Format.zn: 433 case Format.zd: // pointer to size_t 434 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == ptrsize)) 435 errorMsg(null, e, "size_t*", t); 436 break; 437 438 case Format.tn: 439 case Format.td: // pointer to ptrdiff_t 440 if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == ptrsize)) 441 errorMsg(null, e, "ptrdiff_t*", t); 442 break; 443 444 case Format.u: // pointer to unsigned int 445 if (!(t.ty == Tpointer && tnext.ty == Tuns32)) 446 errorMsg(null, e, "uint*", t); 447 break; 448 449 case Format.hhu: // pointer to unsigned char 450 if (!(t.ty == Tpointer && tnext.ty == Tuns8)) 451 errorMsg(null, e, "ubyte*", t); 452 break; 453 454 case Format.hu: // pointer to unsigned short int 455 if (!(t.ty == Tpointer && tnext.ty == Tuns16)) 456 errorMsg(null, e, "ushort*", t); 457 break; 458 459 case Format.lu: // pointer to unsigned long int 460 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == c_longsize)) 461 errorMsg(null, e, (c_longsize == 4 ? "uint*" : "ulong*"), t); 462 break; 463 464 case Format.llu: // pointer to unsigned long long int 465 if (!(t.ty == Tpointer && tnext.ty == Tuns64)) 466 errorMsg(null, e, "ulong*", t); 467 break; 468 469 case Format.ju: // pointer to uintmax_t 470 if (!(t.ty == Tpointer && tnext.ty == Tuns64)) 471 errorMsg(null, e, "core.stdc.stdint.uintmax_t*", t); 472 break; 473 474 case Format.g: // pointer to float 475 if (!(t.ty == Tpointer && tnext.ty == Tfloat32)) 476 errorMsg(null, e, "float*", t); 477 break; 478 479 case Format.lg: // pointer to double 480 if (!(t.ty == Tpointer && tnext.ty == Tfloat64)) 481 errorMsg(null, e, "double*", t); 482 break; 483 484 case Format.Lg: // pointer to long double 485 if (!(t.ty == Tpointer && tnext.ty == Tfloat80)) 486 errorMsg(null, e, "real*", t); 487 break; 488 489 case Format.c: 490 case Format.s: // pointer to char string 491 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8))) 492 errorMsg(null, e, "char*", t); 493 break; 494 495 case Format.lc: 496 case Format.ls: // pointer to wchar_t string 497 if (!(t.ty == Tpointer && tnext.ty.isSomeChar && tnext.size() == target.c.wchar_tsize)) 498 errorMsg(null, e, "wchar_t*", t); 499 break; 500 501 case Format.p: // double pointer 502 if (!(t.ty == Tpointer && tnext.ty == Tpointer)) 503 errorMsg(null, e, "void**", t); 504 break; 505 506 case Format.POSIX_ms: // pointer to pointer to char string 507 Type tnext2 = tnext ? tnext.nextOf() : null; 508 if (!(t.ty == Tpointer && tnext.ty == Tpointer && (tnext2.ty == Tchar || tnext2.ty == Tint8 || tnext2.ty == Tuns8))) 509 errorMsg(null, e, "char**", t); 510 break; 511 512 case Format.POSIX_mls: // pointer to pointer to wchar_t string 513 Type tnext2 = tnext ? tnext.nextOf() : null; 514 if (!(t.ty == Tpointer && tnext.ty == Tpointer && tnext2.ty.isSomeChar && tnext2.size() == target.c.wchar_tsize)) 515 errorMsg(null, e, "wchar_t**", t); 516 break; 517 518 case Format.error: 519 eSink.deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr); 520 break; 521 522 case Format.GNU_m: 523 case Format.percent: 524 assert(0); 525 } 526 } 527 return false; 528 } 529 530 /*****************************************************************************************************/ 531 532 private: 533 534 /************************************** 535 * Parse the *format specifier* which is of the form: 536 * 537 * `%[*][width][length]specifier` 538 * 539 * Params: 540 * format = format string 541 * idx = index of `%` of start of format specifier, 542 * which gets updated to index past the end of it, 543 * even if `Format.error` is returned 544 * asterisk = set if there is a `*` sub-specifier 545 * Returns: 546 * Format 547 */ 548 Format parseScanfFormatSpecifier(scope const char[] format, ref size_t idx, 549 out bool asterisk) nothrow pure @safe 550 { 551 auto i = idx; 552 assert(format[i] == '%'); 553 const length = format.length; 554 555 Format error() 556 { 557 idx = i; 558 return Format.error; 559 } 560 561 ++i; 562 if (i == length) 563 return error(); 564 565 if (format[i] == '%') 566 { 567 idx = i + 1; 568 return Format.percent; 569 } 570 571 // * sub-specifier 572 if (format[i] == '*') 573 { 574 ++i; 575 if (i == length) 576 return error(); 577 asterisk = true; 578 } 579 580 // fieldWidth 581 while (isdigit(format[i])) 582 { 583 i++; 584 if (i == length) 585 return error(); 586 } 587 588 /* Read the specifier 589 */ 590 Format specifier; 591 Modifier flags = Modifier.none; 592 switch (format[i]) 593 { 594 case 'm': 595 // https://pubs.opengroup.org/onlinepubs/9699919799/functions/scanf.html 596 // POSIX.1-2017 C Extension (CX) 597 flags = Modifier.m; 598 ++i; 599 if (i == length) 600 return error(); 601 if (format[i] == 'l') 602 { 603 ++i; 604 if (i == length) 605 return error(); 606 flags = Modifier.ml; 607 } 608 609 // Check valid conversion types for %m. 610 if (format[i] == 'c' || format[i] == 's') 611 specifier = flags == Modifier.ml ? Format.POSIX_mls : 612 Format.POSIX_ms; 613 else if (format[i] == 'C' || format[i] == 'S') 614 specifier = flags == Modifier.m ? Format.POSIX_mls : 615 Format.error; 616 else if (format[i] == '[') 617 goto case '['; 618 else 619 specifier = Format.error; 620 ++i; 621 break; 622 623 case 'l': 624 // Look for wchar_t scanset %l[..] 625 immutable j = i + 1; 626 if (j < length && format[j] == '[') 627 { 628 i = j; 629 flags = Modifier.l; 630 goto case '['; 631 } 632 goto default; 633 634 case '[': 635 // Read the scanset 636 i++; 637 if (i == length) 638 return error(); 639 // If the conversion specifier begins with `[]` or `[^]`, the right 640 // bracket character is not the terminator, but in the scanlist. 641 if (format[i] == '^') 642 { 643 i++; 644 if (i == length) 645 return error(); 646 } 647 if (format[i] == ']') 648 { 649 i++; 650 if (i == length) 651 return error(); 652 } 653 // A scanset can be anything, so we just check that it is paired 654 while (i < length) 655 { 656 if (format[i] == ']') 657 break; 658 ++i; 659 } 660 // no `]` found 661 if (i == length) 662 return error(); 663 664 specifier = flags == Modifier.none ? Format.s : 665 flags == Modifier.l ? Format.ls : 666 flags == Modifier.m ? Format.POSIX_ms : 667 flags == Modifier.ml ? Format.POSIX_mls : 668 Format.error; 669 ++i; 670 break; 671 672 default: 673 char genSpec; 674 specifier = parseGenericFormatSpecifier(format, i, genSpec); 675 if (specifier == Format.error) 676 return error(); 677 break; 678 } 679 680 idx = i; 681 return specifier; // success 682 } 683 684 /************************************** 685 * Parse the *format specifier* which is of the form: 686 * 687 * `%[flags][field width][.precision][length modifier]specifier` 688 * 689 * Params: 690 * format = format string 691 * idx = index of `%` of start of format specifier, 692 * which gets updated to index past the end of it, 693 * even if `Format.error` is returned 694 * widthStar = set if * for width 695 * precisionStar = set if * for precision 696 * useGNUExts = true if parsing GNU format extensions 697 * Returns: 698 * Format 699 */ 700 Format parsePrintfFormatSpecifier(scope const char[] format, ref size_t idx, 701 out bool widthStar, out bool precisionStar, bool useGNUExts = 702 findCondition(global.versionids, Identifier.idPool("CRuntime_Glibc"))) nothrow pure @safe 703 { 704 auto i = idx; 705 assert(format[i] == '%'); 706 const length = format.length; 707 bool hash; 708 bool zero; 709 bool flags; 710 bool width; 711 bool precision; 712 713 Format error() 714 { 715 idx = i; 716 return Format.error; 717 } 718 719 ++i; 720 if (i == length) 721 return error(); 722 723 if (format[i] == '%') 724 { 725 idx = i + 1; 726 return Format.percent; 727 } 728 729 /* Read the `flags` 730 */ 731 while (1) 732 { 733 const c = format[i]; 734 if (c == '-' || 735 c == '+' || 736 c == ' ') 737 { 738 flags = true; 739 } 740 else if (c == '#') 741 { 742 hash = true; 743 } 744 else if (c == '0') 745 { 746 zero = true; 747 } 748 else 749 break; 750 ++i; 751 if (i == length) 752 return error(); 753 } 754 755 /* Read the `field width` 756 */ 757 { 758 const c = format[i]; 759 if (c == '*') 760 { 761 width = true; 762 widthStar = true; 763 ++i; 764 if (i == length) 765 return error(); 766 } 767 else if ('1' <= c && c <= '9') 768 { 769 width = true; 770 ++i; 771 if (i == length) 772 return error(); 773 while ('0' <= format[i] && format[i] <= '9') 774 { 775 ++i; 776 if (i == length) 777 return error(); 778 } 779 } 780 } 781 782 /* Read the `precision` 783 */ 784 if (format[i] == '.') 785 { 786 precision = true; 787 ++i; 788 if (i == length) 789 return error(); 790 const c = format[i]; 791 if (c == '*') 792 { 793 precisionStar = true; 794 ++i; 795 if (i == length) 796 return error(); 797 } 798 else if ('0' <= c && c <= '9') 799 { 800 ++i; 801 if (i == length) 802 return error(); 803 while ('0' <= format[i] && format[i] <= '9') 804 { 805 ++i; 806 if (i == length) 807 return error(); 808 } 809 } 810 } 811 812 /* Read the specifier 813 */ 814 char genSpec; 815 Format specifier; 816 switch (format[i]) 817 { 818 case 'm': 819 // https://www.gnu.org/software/libc/manual/html_node/Other-Output-Conversions.html 820 if (useGNUExts) 821 { 822 specifier = Format.GNU_m; 823 genSpec = format[i]; 824 ++i; 825 break; 826 } 827 goto default; 828 829 default: 830 specifier = parseGenericFormatSpecifier(format, i, genSpec); 831 if (specifier == Format.error) 832 return error(); 833 break; 834 } 835 836 switch (genSpec) 837 { 838 case 'c': 839 case 's': 840 case 'C': 841 case 'S': 842 if (hash || zero) 843 return error(); 844 break; 845 846 case 'd': 847 case 'i': 848 if (hash) 849 return error(); 850 break; 851 852 case 'm': 853 if (hash || zero || flags) 854 return error(); 855 break; 856 857 case 'n': 858 if (hash || zero || precision || width || flags) 859 return error(); 860 break; 861 862 default: 863 break; 864 } 865 866 idx = i; 867 return specifier; // success 868 } 869 870 /* Different kinds of conversion modifiers. */ 871 enum Modifier 872 { 873 none, 874 h, // short 875 hh, // char 876 j, // intmax_t 877 l, // wint_t/wchar_t 878 ll, // long long int 879 L, // long double 880 m, // char** 881 ml, // wchar_t** 882 t, // ptrdiff_t 883 z // size_t 884 } 885 886 /* Different kinds of formatting specifications, variations we don't 887 care about are merged. (Like we don't care about the difference between 888 f, e, g, a, etc.) 889 890 For `scanf`, every format is a pointer. 891 */ 892 enum Format 893 { 894 d, // int 895 hhd, // signed char 896 hd, // short int 897 ld, // long int 898 lld, // long long int 899 jd, // intmax_t 900 zd, // size_t 901 td, // ptrdiff_t 902 u, // unsigned int 903 hhu, // unsigned char 904 hu, // unsigned short int 905 lu, // unsigned long int 906 llu, // unsigned long long int 907 ju, // uintmax_t 908 g, // float (scanf) / double (printf) 909 lg, // double (scanf) 910 Lg, // long double (both) 911 s, // char string (both) 912 ls, // wchar_t string (both) 913 c, // char (printf) 914 lc, // wint_t (printf) 915 p, // pointer 916 n, // pointer to int 917 hhn, // pointer to signed char 918 hn, // pointer to short 919 ln, // pointer to long int 920 lln, // pointer to long long int 921 jn, // pointer to intmax_t 922 zn, // pointer to size_t 923 tn, // pointer to ptrdiff_t 924 GNU_m, // GNU ext. : string corresponding to the error code in errno (printf) 925 POSIX_ms, // POSIX ext. : dynamically allocated char string (scanf) 926 POSIX_mls, // POSIX ext. : dynamically allocated wchar_t string (scanf) 927 percent, // %% (i.e. no argument) 928 error, // invalid format specification 929 } 930 931 /************************************** 932 * Parse the *length specifier* and the *specifier* of the following form: 933 * `[length]specifier` 934 * 935 * Params: 936 * format = format string 937 * idx = index of of start of format specifier, 938 * which gets updated to index past the end of it, 939 * even if `Format.error` is returned 940 * genSpecifier = Generic specifier. For instance, it will be set to `d` if the 941 * format is `hdd`. 942 * Returns: 943 * Format 944 */ 945 Format parseGenericFormatSpecifier(scope const char[] format, 946 ref size_t idx, out char genSpecifier) nothrow pure @safe 947 { 948 const length = format.length; 949 950 /* Read the `length modifier` 951 */ 952 const lm = format[idx]; 953 Modifier flags; 954 switch (lm) 955 { 956 case 'j': 957 case 'z': 958 case 't': 959 case 'L': 960 flags = lm == 'j' ? Modifier.j : 961 lm == 'z' ? Modifier.z : 962 lm == 't' ? Modifier.t : 963 Modifier.L; 964 ++idx; 965 if (idx == length) 966 return Format.error; 967 break; 968 969 case 'h': 970 case 'l': 971 ++idx; 972 if (idx == length) 973 return Format.error; 974 if (lm == format[idx]) 975 { 976 flags = lm == 'h' ? Modifier.hh : Modifier.ll; 977 ++idx; 978 if (idx == length) 979 return Format.error; 980 } 981 else 982 flags = lm == 'h' ? Modifier.h : Modifier.l; 983 break; 984 985 default: 986 flags = Modifier.none; 987 break; 988 } 989 990 /* Read the `specifier` 991 */ 992 Format specifier; 993 const sc = format[idx]; 994 genSpecifier = sc; 995 switch (sc) 996 { 997 case 'd': 998 case 'i': 999 specifier = flags == Modifier.none ? Format.d : 1000 flags == Modifier.hh ? Format.hhd : 1001 flags == Modifier.h ? Format.hd : 1002 flags == Modifier.ll ? Format.lld : 1003 flags == Modifier.l ? Format.ld : 1004 flags == Modifier.j ? Format.jd : 1005 flags == Modifier.z ? Format.zd : 1006 flags == Modifier.t ? Format.td : 1007 Format.error; 1008 break; 1009 1010 case 'u': 1011 case 'o': 1012 case 'x': 1013 case 'X': 1014 specifier = flags == Modifier.none ? Format.u : 1015 flags == Modifier.hh ? Format.hhu : 1016 flags == Modifier.h ? Format.hu : 1017 flags == Modifier.ll ? Format.llu : 1018 flags == Modifier.l ? Format.lu : 1019 flags == Modifier.j ? Format.ju : 1020 flags == Modifier.z ? Format.zd : 1021 flags == Modifier.t ? Format.td : 1022 Format.error; 1023 break; 1024 1025 case 'f': 1026 case 'F': 1027 case 'e': 1028 case 'E': 1029 case 'g': 1030 case 'G': 1031 case 'a': 1032 case 'A': 1033 specifier = flags == Modifier.none ? Format.g : 1034 flags == Modifier.L ? Format.Lg : 1035 flags == Modifier.l ? Format.lg : 1036 Format.error; 1037 break; 1038 1039 case 'c': 1040 specifier = flags == Modifier.none ? Format.c : 1041 flags == Modifier.l ? Format.lc : 1042 Format.error; 1043 break; 1044 1045 case 's': 1046 specifier = flags == Modifier.none ? Format.s : 1047 flags == Modifier.l ? Format.ls : 1048 Format.error; 1049 break; 1050 1051 case 'p': 1052 specifier = flags == Modifier.none ? Format.p : 1053 Format.error; 1054 break; 1055 1056 case 'n': 1057 specifier = flags == Modifier.none ? Format.n : 1058 flags == Modifier.ll ? Format.lln : 1059 flags == Modifier.l ? Format.ln : 1060 flags == Modifier.hh ? Format.hhn : 1061 flags == Modifier.h ? Format.hn : 1062 flags == Modifier.j ? Format.jn : 1063 flags == Modifier.z ? Format.zn : 1064 flags == Modifier.t ? Format.tn : 1065 Format.error; 1066 break; 1067 1068 case 'C': 1069 // POSIX.1-2017 X/Open System Interfaces (XSI) 1070 // %C format is equivalent to %lc 1071 specifier = flags == Modifier.none ? Format.lc : 1072 Format.error; 1073 break; 1074 1075 case 'S': 1076 // POSIX.1-2017 X/Open System Interfaces (XSI) 1077 // %S format is equivalent to %ls 1078 specifier = flags == Modifier.none ? Format.ls : 1079 Format.error; 1080 break; 1081 1082 default: 1083 specifier = Format.error; 1084 break; 1085 } 1086 1087 ++idx; 1088 return specifier; // success 1089 } 1090 1091 @("parseGenericFormatSpecifier") unittest 1092 { 1093 char genSpecifier; 1094 size_t idx; 1095 1096 void testG(string fmtStr, Format expectedFormat, char expectedGenSpecifier) 1097 { 1098 idx = 0; 1099 assert(parseGenericFormatSpecifier(fmtStr, idx, genSpecifier) == expectedFormat); 1100 assert(genSpecifier == expectedGenSpecifier); 1101 } 1102 1103 testG("hhd", Format.hhd, 'd'); 1104 testG("hn", Format.hn, 'n'); 1105 testG("ji", Format.jd, 'i'); 1106 testG("lu", Format.lu, 'u'); 1107 1108 idx = 0; 1109 assert(parseGenericFormatSpecifier("k", idx, genSpecifier) == Format.error); 1110 } 1111 1112 @("parsePrintfFormatSpecifier") unittest 1113 { 1114 bool useGNUExts = false; 1115 1116 size_t idx = 0; 1117 bool widthStar; 1118 bool precisionStar; 1119 1120 void testP(string fmtStr, Format expectedFormat, size_t expectedIdx) 1121 { 1122 idx = 0; 1123 assert(parsePrintfFormatSpecifier(fmtStr, idx, widthStar, precisionStar, useGNUExts) == expectedFormat); 1124 assert(idx == expectedIdx); 1125 } 1126 1127 // one for each Format 1128 testP("%d", Format.d, 2); 1129 assert(!widthStar && !precisionStar); 1130 1131 testP("%ld", Format.ld, 3); 1132 testP("%lld", Format.lld, 4); 1133 testP("%jd", Format.jd, 3); 1134 testP("%zd", Format.zd, 3); 1135 testP("%td", Format.td, 3); 1136 testP("%g", Format.g, 2); 1137 testP("%Lg", Format.Lg, 3); 1138 testP("%p", Format.p, 2); 1139 testP("%n", Format.n, 2); 1140 testP("%ln", Format.ln, 3); 1141 testP("%lln", Format.lln, 4); 1142 testP("%hn", Format.hn, 3); 1143 testP("%hhn", Format.hhn, 4); 1144 testP("%jn", Format.jn, 3); 1145 testP("%zn", Format.zn, 3); 1146 testP("%tn", Format.tn, 3); 1147 testP("%c", Format.c, 2); 1148 testP("%lc", Format.lc, 3); 1149 testP("%s", Format.s, 2); 1150 testP("%ls", Format.ls, 3); 1151 testP("%%", Format.percent, 2); 1152 1153 // Synonyms 1154 testP("%i", Format.d, 2); 1155 testP("%u", Format.u, 2); 1156 testP("%o", Format.u, 2); 1157 testP("%x", Format.u, 2); 1158 testP("%X", Format.u, 2); 1159 testP("%f", Format.g, 2); 1160 testP("%F", Format.g, 2); 1161 testP("%G", Format.g, 2); 1162 testP("%a", Format.g, 2); 1163 testP("%La", Format.Lg, 3); 1164 testP("%A", Format.g, 2); 1165 testP("%lg", Format.lg, 3); 1166 1167 // width, precision 1168 testP("%*d", Format.d, 3); 1169 assert(widthStar && !precisionStar); 1170 1171 testP("%.*d", Format.d, 4); 1172 assert(!widthStar && precisionStar); 1173 1174 testP("%*.*d", Format.d, 5); 1175 assert(widthStar && precisionStar); 1176 1177 // Too short formats 1178 foreach (s; ["%", "%-", "%+", "% ", "%#", "%0", "%*", "%1", "%19", "%.", "%.*", "%.1", "%.12", 1179 "%j", "%z", "%t", "%l", "%h", "%ll", "%hh"]) 1180 { 1181 testP(s, Format.error, s.length); 1182 } 1183 1184 // Undefined format combinations 1185 foreach (s; ["%#d", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg", 1186 "%#c", "%0c", "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc", 1187 "%#s", "%0s", "%js", "%zs", "%ts", "%Ls", "%hs", "%hhs", "%lls", 1188 "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp", 1189 "%-n", "%+n", "% n", "%#n", "%0n", "%*n", "%1n", "%19n", "%.n", "%.*n", "%.1n", "%.12n", "%Ln", "%K"]) 1190 { 1191 testP(s, Format.error, s.length); 1192 } 1193 1194 testP("%C", Format.lc, 2); 1195 testP("%S", Format.ls, 2); 1196 1197 // GNU extensions: explicitly toggle ISO/GNU flag. 1198 foreach (s; ["%jm", "%zm", "%tm", "%Lm", "%hm", "%hhm", "%lm", "%llm", 1199 "%#m", "%+m", "%-m", "% m", "%0m"]) 1200 { 1201 useGNUExts = false; 1202 testP(s, Format.error, s.length); 1203 useGNUExts = true; 1204 testP(s, Format.error, s.length); 1205 } 1206 1207 foreach (s; ["%m", "%md", "%mz", "%mc", "%mm", "%msyz", "%ml", "%mlz", "%mlc", "%mlm"]) 1208 { 1209 // valid cases, all parsed as `%m` 1210 // GNU printf() 1211 useGNUExts = true; 1212 testP(s, Format.GNU_m, 2); 1213 1214 // ISO printf() 1215 useGNUExts = false; 1216 testP(s, Format.error, 2); 1217 } 1218 } 1219 1220 @("parseScanfFormatSpecifier") unittest 1221 { 1222 size_t idx; 1223 bool asterisk; 1224 1225 void testS(string fmtStr, Format expectedFormat, size_t expectedIdx) 1226 { 1227 idx = 0; 1228 assert(parseScanfFormatSpecifier(fmtStr, idx, asterisk) == expectedFormat); 1229 assert(idx == expectedIdx); 1230 } 1231 1232 // one for each Format 1233 testS("%d", Format.d, 2); 1234 testS("%hhd", Format.hhd, 4); 1235 testS("%hd", Format.hd, 3); 1236 testS("%ld", Format.ld, 3); 1237 testS("%lld", Format.lld, 4); 1238 testS("%jd", Format.jd, 3); 1239 testS("%zd", Format.zd, 3); 1240 testS("%td", Format.td, 3); 1241 testS("%u", Format.u, 2); 1242 testS("%hhu", Format.hhu, 4); 1243 testS("%hu", Format.hu, 3); 1244 testS("%lu", Format.lu, 3); 1245 testS("%llu", Format.llu, 4); 1246 testS("%ju", Format.ju, 3); 1247 testS("%g", Format.g, 2); 1248 testS("%lg", Format.lg, 3); 1249 testS("%Lg", Format.Lg, 3); 1250 testS("%p", Format.p, 2); 1251 testS("%s", Format.s, 2); 1252 testS("%ls", Format.ls, 3); 1253 testS("%%", Format.percent, 2); 1254 1255 // Synonyms 1256 testS("%i", Format.d, 2); 1257 testS("%n", Format.n, 2); 1258 1259 testS("%o", Format.u, 2); 1260 testS("%x", Format.u, 2); 1261 testS("%f", Format.g, 2); 1262 testS("%e", Format.g, 2); 1263 testS("%a", Format.g, 2); 1264 testS("%c", Format.c, 2); 1265 1266 // asterisk 1267 testS("%*d", Format.d, 3); 1268 assert(asterisk); 1269 1270 testS("%9ld", Format.ld, 4); 1271 assert(!asterisk); 1272 1273 testS("%*25984hhd", Format.hhd, 10); 1274 assert(asterisk); 1275 1276 // scansets 1277 testS("%[a-zA-Z]", Format.s, 9); 1278 assert(!asterisk); 1279 1280 testS("%*25l[a-z]", Format.ls, 10); 1281 assert(asterisk); 1282 1283 testS("%[]]", Format.s, 4); 1284 assert(!asterisk); 1285 1286 testS("%[^]]", Format.s, 5); 1287 assert(!asterisk); 1288 1289 // Too short formats 1290 foreach (s; ["%", "% ", "%#", "%0", "%*", "%1", "%19", 1291 "%j", "%z", "%t", "%l", "%h", "%ll", "%hh", "%K"]) 1292 { 1293 1294 testS(s, Format.error, s.length); 1295 } 1296 1297 1298 // Undefined format combinations 1299 foreach (s; ["%Ld", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg", 1300 "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc", 1301 "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp", 1302 "%-", "%+", "%#", "%0", "%.", "%Ln"]) 1303 { 1304 1305 testS(s, Format.error, s.length); 1306 1307 } 1308 1309 // Invalid scansets 1310 foreach (s; ["%[]", "%[^", "%[^]", "%[s", "%[0-9lld", "%[", "%l[^]"]) 1311 { 1312 1313 testS(s, Format.error, s.length); 1314 } 1315 1316 // Posix extensions 1317 foreach (s; ["%jm", "%zm", "%tm", "%Lm", "%hm", "%hhm", "%lm", "%llm", 1318 "%m", "%ma", "%md", "%ml", "%mm", "%mlb", "%mlj", "%mlr", "%mlz", 1319 "%LC", "%lC", "%llC", "%jC", "%tC", "%hC", "%hhC", "%zC", 1320 "%LS", "%lS", "%llS", "%jS", "%tS", "%hS", "%hhS", "%zS"]) 1321 { 1322 1323 testS(s, Format.error, s.length); 1324 } 1325 1326 testS("%mc", Format.POSIX_ms, 3); 1327 testS("%ms", Format.POSIX_ms, 3); 1328 testS("%m[0-9]", Format.POSIX_ms, 7); 1329 testS("%mlc", Format.POSIX_mls, 4); 1330 testS("%mls", Format.POSIX_mls, 4); 1331 testS("%ml[^0-9]", Format.POSIX_mls, 9); 1332 testS("%mC", Format.POSIX_mls, 3); 1333 testS("%mS", Format.POSIX_mls, 3); 1334 1335 testS("%C", Format.lc, 2); 1336 testS("%S", Format.ls, 2); 1337 }