1 /**
2  * Check the arguments to `printf` and `scanf` against the `format` string.
3  *
4  * Copyright:   Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/chkformat.d, _chkformat.d)
8  * Documentation:  https://dlang.org/phobos/dmd_chkformat.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/chkformat.d
10  */
11 module dmd.chkformat;
12 
13 //import core.stdc.stdio : printf, scanf;
14 import core.stdc.ctype : isdigit;
15 
16 import dmd.astenums;
17 import dmd.cond;
18 import dmd.errors;
19 import dmd.expression;
20 import dmd.globals;
21 import dmd.identifier;
22 import dmd.location;
23 import dmd.mtype;
24 import dmd.target;
25 
26 
27 /******************************************
28  * Check that arguments to a printf format string are compatible
29  * with that string. Issue errors for incompatibilities.
30  *
31  * Follows the C99 specification for printf.
32  *
33  * Takes a generous, rather than strict, view of compatiblity.
34  * For example, an unsigned value can be formatted with a signed specifier.
35  *
36  * Diagnosed incompatibilities are:
37  *
38  * 1. incompatible sizes which will cause argument misalignment
39  * 2. deferencing arguments that are not pointers
40  * 3. insufficient number of arguments
41  * 4. struct arguments
42  * 5. array and slice arguments
43  * 6. non-pointer arguments to `s` specifier
44  * 7. non-standard formats
45  * 8. undefined behavior per C99
46  *
47  * Per the C Standard, extra arguments are ignored.
48  *
49  * No attempt is made to fix the arguments or the format string.
50  *
51  * Params:
52  *      loc = location for error messages
53  *      format = format string
54  *      args = arguments to match with format string
55  *      isVa_list = if a "v" function (format check only)
56  *
57  * Returns:
58  *      `true` if errors occurred
59  * References:
60  * C99 7.19.6.1
61  * https://www.cplusplus.com/reference/cstdio/printf/
62  */
63 bool checkPrintfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list)
64 {
65     //printf("checkPrintFormat('%.*s')\n", cast(int)format.length, format.ptr);
66     size_t n;    // index in args
67     for (size_t i = 0; i < format.length;)
68     {
69         if (format[i] != '%')
70         {
71             ++i;
72             continue;
73         }
74         bool widthStar;
75         bool precisionStar;
76         size_t j = i;
77         const fmt = parsePrintfFormatSpecifier(format, j, widthStar, precisionStar);
78         const slice = format[i .. j];
79         i = j;
80 
81         if (fmt == Format.percent)
82             continue;                   // "%%", no arguments
83         if (fmt == Format.GNU_m)
84             continue;                   // "%m", no arguments
85 
86         if (isVa_list)
87         {
88             // format check only
89             if (fmt == Format.error)
90                 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
91             continue;
92         }
93 
94         Expression getNextArg(ref bool skip)
95         {
96             if (n == args.length)
97             {
98                 if (args.length < (n + 1))
99                     deprecation(loc, "more format specifiers than %d arguments", cast(int)n);
100                 else
101                     skip = true;
102                 return null;
103             }
104             return args[n++];
105         }
106 
107         void errorMsg(const char* prefix, Expression arg, const char* texpect, Type tactual)
108         {
109             deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`",
110                   prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars());
111         }
112 
113         if (widthStar)
114         {
115             bool skip;
116             auto e = getNextArg(skip);
117             if (skip)
118                 continue;
119             if (!e)
120                 return true;
121             auto t = e.type.toBasetype();
122             if (t.ty != Tint32 && t.ty != Tuns32)
123                 errorMsg("width ", e, "int", t);
124         }
125 
126         if (precisionStar)
127         {
128             bool skip;
129             auto e = getNextArg(skip);
130             if (skip)
131                 continue;
132             if (!e)
133                 return true;
134             auto t = e.type.toBasetype();
135             if (t.ty != Tint32 && t.ty != Tuns32)
136                 errorMsg("precision ", e, "int", t);
137         }
138 
139         bool skip;
140         auto e = getNextArg(skip);
141         if (skip)
142             continue;
143         if (!e)
144             return true;
145         auto t = e.type.toBasetype();
146         auto tnext = t.nextOf();
147         const c_longsize = target.c.longsize;
148         const ptrsize = target.ptrsize;
149 
150         // Types which are promoted to int are allowed.
151         // Spec: C99 6.5.2.2.7
152         final switch (fmt)
153         {
154             case Format.u:      // unsigned int
155             case Format.d:      // int
156                 if (t.ty != Tint32 && t.ty != Tuns32)
157                     errorMsg(null, e, fmt == Format.u ? "uint" : "int", t);
158                 break;
159 
160             case Format.hhu:    // unsigned char
161             case Format.hhd:    // signed char
162                 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint8 && t.ty != Tuns8)
163                     errorMsg(null, e, fmt == Format.hhu ? "ubyte" : "byte", t);
164                 break;
165 
166             case Format.hu:     // unsigned short int
167             case Format.hd:     // short int
168                 if (t.ty != Tint32 && t.ty != Tuns32 && t.ty != Tint16 && t.ty != Tuns16)
169                     errorMsg(null, e, fmt == Format.hu ? "ushort" : "short", t);
170                 break;
171 
172             case Format.lu:     // unsigned long int
173             case Format.ld:     // long int
174                 if (!(t.isintegral() && t.size() == c_longsize))
175                 {
176                     if (fmt == Format.lu)
177                         errorMsg(null, e, (c_longsize == 4 ? "uint" : "ulong"), t);
178                     else
179                         errorMsg(null, e, (c_longsize == 4 ? "int" : "long"), t);
180                 }
181                 break;
182 
183             case Format.llu:    // unsigned long long int
184             case Format.lld:    // long long int
185                 if (t.ty != Tint64 && t.ty != Tuns64)
186                     errorMsg(null, e, fmt == Format.llu ? "ulong" : "long", t);
187                 break;
188 
189             case Format.ju:     // uintmax_t
190             case Format.jd:     // intmax_t
191                 if (t.ty != Tint64 && t.ty != Tuns64)
192                 {
193                     if (fmt == Format.ju)
194                         errorMsg(null, e, "core.stdc.stdint.uintmax_t", t);
195                     else
196                         errorMsg(null, e, "core.stdc.stdint.intmax_t", t);
197                 }
198                 break;
199 
200             case Format.zd:     // size_t
201                 if (!(t.isintegral() && t.size() == ptrsize))
202                     errorMsg(null, e, "size_t", t);
203                 break;
204 
205             case Format.td:     // ptrdiff_t
206                 if (!(t.isintegral() && t.size() == ptrsize))
207                     errorMsg(null, e, "ptrdiff_t", t);
208                 break;
209 
210             case Format.lg:
211             case Format.g:      // double
212                 if (t.ty != Tfloat64 && t.ty != Timaginary64)
213                     errorMsg(null, e, "double", t);
214                 break;
215 
216             case Format.Lg:     // long double
217                 if (t.ty != Tfloat80 && t.ty != Timaginary80)
218                     errorMsg(null, e, "real", t);
219                 break;
220 
221             case Format.p:      // pointer
222                 if (t.ty != Tpointer && t.ty != Tnull && t.ty != Tclass && t.ty != Tdelegate && t.ty != Taarray)
223                     errorMsg(null, e, "void*", t);
224                 break;
225 
226             case Format.n:      // pointer to int
227                 if (!(t.ty == Tpointer && tnext.ty == Tint32))
228                     errorMsg(null, e, "int*", t);
229                 break;
230 
231             case Format.ln:     // pointer to long int
232                 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.size() == c_longsize))
233                     errorMsg(null, e, (c_longsize == 4 ? "int*" : "long*"), t);
234                 break;
235 
236             case Format.lln:    // pointer to long long int
237                 if (!(t.ty == Tpointer && tnext.ty == Tint64))
238                     errorMsg(null, e, "long*", t);
239                 break;
240 
241             case Format.hn:     // pointer to short
242                 if (!(t.ty == Tpointer && tnext.ty == Tint16))
243                     errorMsg(null, e, "short*", t);
244                 break;
245 
246             case Format.hhn:    // pointer to signed char
247                 if (!(t.ty == Tpointer && tnext.ty == Tint16))
248                     errorMsg(null, e, "byte*", t);
249                 break;
250 
251             case Format.jn:     // pointer to intmax_t
252                 if (!(t.ty == Tpointer && tnext.ty == Tint64))
253                     errorMsg(null, e, "core.stdc.stdint.intmax_t*", t);
254                 break;
255 
256             case Format.zn:     // pointer to size_t
257                 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == ptrsize))
258                     errorMsg(null, e, "size_t*", t);
259                 break;
260 
261             case Format.tn:     // pointer to ptrdiff_t
262                 if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == ptrsize))
263                     errorMsg(null, e, "ptrdiff_t*", t);
264                 break;
265 
266             case Format.c:      // char
267                 if (t.ty != Tint32 && t.ty != Tuns32)
268                     errorMsg(null, e, "char", t);
269                 break;
270 
271             case Format.lc:     // wint_t
272                 if (t.ty != Tint32 && t.ty != Tuns32)
273                     errorMsg(null, e, "wchar_t", t);
274                 break;
275 
276             case Format.s:      // pointer to char string
277                 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8)))
278                     errorMsg(null, e, "char*", t);
279                 break;
280 
281             case Format.ls:     // pointer to wchar_t string
282                 if (!(t.ty == Tpointer && tnext.ty.isSomeChar && tnext.size() == target.c.wchar_tsize))
283                     errorMsg(null, e, "wchar_t*", t);
284                 break;
285 
286             case Format.error:
287                 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
288                 break;
289 
290             case Format.GNU_m:
291             case Format.POSIX_ms:
292             case Format.POSIX_mls:
293             case Format.percent:
294                 assert(0);
295         }
296     }
297     return false;
298 }
299 
300 /******************************************
301  * Check that arguments to a scanf format string are compatible
302  * with that string. Issue errors for incompatibilities.
303  *
304  * Follows the C99 specification for scanf.
305  *
306  * Takes a generous, rather than strict, view of compatiblity.
307  * For example, an unsigned value can be formatted with a signed specifier.
308  *
309  * Diagnosed incompatibilities are:
310  *
311  * 1. incompatible sizes which will cause argument misalignment
312  * 2. deferencing arguments that are not pointers
313  * 3. insufficient number of arguments
314  * 4. struct arguments
315  * 5. array and slice arguments
316  * 6. non-standard formats
317  * 7. undefined behavior per C99
318  *
319  * Per the C Standard, extra arguments are ignored.
320  *
321  * No attempt is made to fix the arguments or the format string.
322  *
323  * Params:
324  *      loc = location for error messages
325  *      format = format string
326  *      args = arguments to match with format string
327  *      isVa_list = if a "v" function (format check only)
328  *
329  * Returns:
330  *      `true` if errors occurred
331  * References:
332  * C99 7.19.6.2
333  * https://www.cplusplus.com/reference/cstdio/scanf/
334  */
335 bool checkScanfFormat(ref const Loc loc, scope const char[] format, scope Expression[] args, bool isVa_list)
336 {
337     size_t n = 0;
338     for (size_t i = 0; i < format.length;)
339     {
340         if (format[i] != '%')
341         {
342             ++i;
343             continue;
344         }
345         bool asterisk;
346         size_t j = i;
347         const fmt = parseScanfFormatSpecifier(format, j, asterisk);
348         const slice = format[i .. j];
349         i = j;
350 
351         if (fmt == Format.percent || asterisk)
352             continue;   // "%%", "%*": no arguments
353 
354         if (isVa_list)
355         {
356             // format check only
357             if (fmt == Format.error)
358                 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
359             continue;
360         }
361 
362         Expression getNextArg()
363         {
364             if (n == args.length)
365             {
366                 if (!asterisk)
367                     deprecation(loc, "more format specifiers than %d arguments", cast(int)n);
368                 return null;
369             }
370             return args[n++];
371         }
372 
373         void errorMsg(const char* prefix, Expression arg, const char* texpect, Type tactual)
374         {
375             deprecation(arg.loc, "%sargument `%s` for format specification `\"%.*s\"` must be `%s`, not `%s`",
376                   prefix ? prefix : "", arg.toChars(), cast(int)slice.length, slice.ptr, texpect, tactual.toChars());
377         }
378 
379         auto e = getNextArg();
380         if (!e)
381             return true;
382 
383         auto t = e.type.toBasetype();
384         auto tnext = t.nextOf();
385         const c_longsize = target.c.longsize;
386         const ptrsize = target.ptrsize;
387 
388         final switch (fmt)
389         {
390             case Format.n:
391             case Format.d:      // pointer to int
392                 if (!(t.ty == Tpointer && tnext.ty == Tint32))
393                     errorMsg(null, e, "int*", t);
394                 break;
395 
396             case Format.hhn:
397             case Format.hhd:    // pointer to signed char
398                 if (!(t.ty == Tpointer && tnext.ty == Tint16))
399                     errorMsg(null, e, "byte*", t);
400                 break;
401 
402             case Format.hn:
403             case Format.hd:     // pointer to short
404                 if (!(t.ty == Tpointer && tnext.ty == Tint16))
405                     errorMsg(null, e, "short*", t);
406                 break;
407 
408             case Format.ln:
409             case Format.ld:     // pointer to long int
410                 if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == c_longsize))
411                     errorMsg(null, e, (c_longsize == 4 ? "int*" : "long*"), t);
412                 break;
413 
414             case Format.lln:
415             case Format.lld:    // pointer to long long int
416                 if (!(t.ty == Tpointer && tnext.ty == Tint64))
417                     errorMsg(null, e, "long*", t);
418                 break;
419 
420             case Format.jn:
421             case Format.jd:     // pointer to intmax_t
422                 if (!(t.ty == Tpointer && tnext.ty == Tint64))
423                     errorMsg(null, e, "core.stdc.stdint.intmax_t*", t);
424                 break;
425 
426             case Format.zn:
427             case Format.zd:     // pointer to size_t
428                 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == ptrsize))
429                     errorMsg(null, e, "size_t*", t);
430                 break;
431 
432             case Format.tn:
433             case Format.td:     // pointer to ptrdiff_t
434                 if (!(t.ty == Tpointer && tnext.isintegral() && !tnext.isunsigned() && tnext.size() == ptrsize))
435                     errorMsg(null, e, "ptrdiff_t*", t);
436                 break;
437 
438             case Format.u:      // pointer to unsigned int
439                 if (!(t.ty == Tpointer && tnext.ty == Tuns32))
440                     errorMsg(null, e, "uint*", t);
441                 break;
442 
443             case Format.hhu:    // pointer to unsigned char
444                 if (!(t.ty == Tpointer && tnext.ty == Tuns8))
445                     errorMsg(null, e, "ubyte*", t);
446                 break;
447 
448             case Format.hu:     // pointer to unsigned short int
449                 if (!(t.ty == Tpointer && tnext.ty == Tuns16))
450                     errorMsg(null, e, "ushort*", t);
451                 break;
452 
453             case Format.lu:     // pointer to unsigned long int
454                 if (!(t.ty == Tpointer && tnext.isintegral() && tnext.isunsigned() && tnext.size() == c_longsize))
455                     errorMsg(null, e, (c_longsize == 4 ? "uint*" : "ulong*"), t);
456                 break;
457 
458             case Format.llu:    // pointer to unsigned long long int
459                 if (!(t.ty == Tpointer && tnext.ty == Tuns64))
460                     errorMsg(null, e, "ulong*", t);
461                 break;
462 
463             case Format.ju:     // pointer to uintmax_t
464                 if (!(t.ty == Tpointer && tnext.ty == Tuns64))
465                     errorMsg(null, e, "core.stdc.stdint.uintmax_t*", t);
466                 break;
467 
468             case Format.g:      // pointer to float
469                 if (!(t.ty == Tpointer && tnext.ty == Tfloat32))
470                     errorMsg(null, e, "float*", t);
471                 break;
472 
473             case Format.lg:     // pointer to double
474                 if (!(t.ty == Tpointer && tnext.ty == Tfloat64))
475                     errorMsg(null, e, "double*", t);
476                 break;
477 
478             case Format.Lg:     // pointer to long double
479                 if (!(t.ty == Tpointer && tnext.ty == Tfloat80))
480                     errorMsg(null, e, "real*", t);
481                 break;
482 
483             case Format.c:
484             case Format.s:      // pointer to char string
485                 if (!(t.ty == Tpointer && (tnext.ty == Tchar || tnext.ty == Tint8 || tnext.ty == Tuns8)))
486                     errorMsg(null, e, "char*", t);
487                 break;
488 
489             case Format.lc:
490             case Format.ls:     // pointer to wchar_t string
491                 if (!(t.ty == Tpointer && tnext.ty.isSomeChar && tnext.size() == target.c.wchar_tsize))
492                     errorMsg(null, e, "wchar_t*", t);
493                 break;
494 
495             case Format.p:      // double pointer
496                 if (!(t.ty == Tpointer && tnext.ty == Tpointer))
497                     errorMsg(null, e, "void**", t);
498                 break;
499 
500             case Format.POSIX_ms: // pointer to pointer to char string
501                 Type tnext2 = tnext ? tnext.nextOf() : null;
502                 if (!(t.ty == Tpointer && tnext.ty == Tpointer && (tnext2.ty == Tchar || tnext2.ty == Tint8 || tnext2.ty == Tuns8)))
503                     errorMsg(null, e, "char**", t);
504                 break;
505 
506             case Format.POSIX_mls: // pointer to pointer to wchar_t string
507                 Type tnext2 = tnext ? tnext.nextOf() : null;
508                 if (!(t.ty == Tpointer && tnext.ty == Tpointer && tnext2.ty.isSomeChar && tnext2.size() == target.c.wchar_tsize))
509                     errorMsg(null, e, "wchar_t**", t);
510                 break;
511 
512             case Format.error:
513                 deprecation(loc, "format specifier `\"%.*s\"` is invalid", cast(int)slice.length, slice.ptr);
514                 break;
515 
516             case Format.GNU_m:
517             case Format.percent:
518                 assert(0);
519         }
520     }
521     return false;
522 }
523 
524 private:
525 
526 /**************************************
527  * Parse the *format specifier* which is of the form:
528  *
529  * `%[*][width][length]specifier`
530  *
531  * Params:
532  *      format = format string
533  *      idx = index of `%` of start of format specifier,
534  *          which gets updated to index past the end of it,
535  *          even if `Format.error` is returned
536  *      asterisk = set if there is a `*` sub-specifier
537  * Returns:
538  *      Format
539  */
540 Format parseScanfFormatSpecifier(scope const char[] format, ref size_t idx,
541         out bool asterisk) nothrow pure @safe
542 {
543     auto i = idx;
544     assert(format[i] == '%');
545     const length = format.length;
546 
547     Format error()
548     {
549         idx = i;
550         return Format.error;
551     }
552 
553     ++i;
554     if (i == length)
555         return error();
556 
557     if (format[i] == '%')
558     {
559         idx = i + 1;
560         return Format.percent;
561     }
562 
563     // * sub-specifier
564     if (format[i] == '*')
565     {
566         ++i;
567         if (i == length)
568             return error();
569         asterisk = true;
570     }
571 
572     // fieldWidth
573     while (isdigit(format[i]))
574     {
575         i++;
576         if (i == length)
577             return error();
578     }
579 
580     /* Read the specifier
581      */
582     Format specifier;
583     Modifier flags = Modifier.none;
584     switch (format[i])
585     {
586         case 'm':
587             // https://pubs.opengroup.org/onlinepubs/9699919799/functions/scanf.html
588             // POSIX.1-2017 C Extension (CX)
589             flags = Modifier.m;
590             ++i;
591             if (i == length)
592                 return error();
593             if (format[i] == 'l')
594             {
595                 ++i;
596                 if (i == length)
597                     return error();
598                 flags = Modifier.ml;
599             }
600 
601             // Check valid conversion types for %m.
602             if (format[i] == 'c' || format[i] == 's')
603                 specifier = flags == Modifier.ml ? Format.POSIX_mls :
604                                                    Format.POSIX_ms;
605             else if (format[i] == 'C' || format[i] == 'S')
606                 specifier = flags == Modifier.m ? Format.POSIX_mls :
607                                                   Format.error;
608             else if (format[i] == '[')
609                 goto case '[';
610             else
611                 specifier = Format.error;
612             ++i;
613             break;
614 
615         case 'l':
616             // Look for wchar_t scanset %l[..]
617             immutable j = i + 1;
618             if (j < length && format[j] == '[')
619             {
620                 i = j;
621                 flags = Modifier.l;
622                 goto case '[';
623             }
624             goto default;
625 
626         case '[':
627             // Read the scanset
628             i++;
629             if (i == length)
630                 return error();
631             // If the conversion specifier begins with `[]` or `[^]`, the right
632             // bracket character is not the terminator, but in the scanlist.
633             if (format[i] == '^')
634             {
635                 i++;
636                 if (i == length)
637                     return error();
638             }
639             if (format[i] == ']')
640             {
641                 i++;
642                 if (i == length)
643                     return error();
644             }
645             // A scanset can be anything, so we just check that it is paired
646             while (i < length)
647             {
648                 if (format[i] == ']')
649                     break;
650                 ++i;
651             }
652             // no `]` found
653             if (i == length)
654                 return error();
655 
656             specifier = flags == Modifier.none ? Format.s         :
657                         flags == Modifier.l    ? Format.ls        :
658                         flags == Modifier.m    ? Format.POSIX_ms  :
659                         flags == Modifier.ml   ? Format.POSIX_mls :
660                                                  Format.error;
661             ++i;
662             break;
663 
664         default:
665             char genSpec;
666             specifier = parseGenericFormatSpecifier(format, i, genSpec);
667             if (specifier == Format.error)
668                 return error();
669             break;
670     }
671 
672     idx = i;
673     return specifier;  // success
674 }
675 
676 /**************************************
677  * Parse the *format specifier* which is of the form:
678  *
679  * `%[flags][field width][.precision][length modifier]specifier`
680  *
681  * Params:
682  *      format = format string
683  *      idx = index of `%` of start of format specifier,
684  *          which gets updated to index past the end of it,
685  *          even if `Format.error` is returned
686  *      widthStar = set if * for width
687  *      precisionStar = set if * for precision
688  *      useGNUExts = true if parsing GNU format extensions
689  * Returns:
690  *      Format
691  */
692 Format parsePrintfFormatSpecifier(scope const char[] format, ref size_t idx,
693         out bool widthStar, out bool precisionStar, bool useGNUExts =
694         findCondition(global.versionids, Identifier.idPool("CRuntime_Glibc"))) nothrow pure @safe
695 {
696     auto i = idx;
697     assert(format[i] == '%');
698     const length = format.length;
699     bool hash;
700     bool zero;
701     bool flags;
702     bool width;
703     bool precision;
704 
705     Format error()
706     {
707         idx = i;
708         return Format.error;
709     }
710 
711     ++i;
712     if (i == length)
713         return error();
714 
715     if (format[i] == '%')
716     {
717         idx = i + 1;
718         return Format.percent;
719     }
720 
721     /* Read the `flags`
722      */
723     while (1)
724     {
725         const c = format[i];
726         if (c == '-' ||
727             c == '+' ||
728             c == ' ')
729         {
730             flags = true;
731         }
732         else if (c == '#')
733         {
734             hash = true;
735         }
736         else if (c == '0')
737         {
738             zero = true;
739         }
740         else
741             break;
742         ++i;
743         if (i == length)
744             return error();
745     }
746 
747     /* Read the `field width`
748      */
749     {
750         const c = format[i];
751         if (c == '*')
752         {
753             width = true;
754             widthStar = true;
755             ++i;
756             if (i == length)
757                 return error();
758         }
759         else if ('1' <= c && c <= '9')
760         {
761             width = true;
762             ++i;
763             if (i == length)
764                 return error();
765             while ('0' <= format[i] && format[i] <= '9')
766             {
767                 ++i;
768                 if (i == length)
769                     return error();
770             }
771         }
772     }
773 
774     /* Read the `precision`
775      */
776     if (format[i] == '.')
777     {
778         precision = true;
779         ++i;
780         if (i == length)
781             return error();
782         const c = format[i];
783         if (c == '*')
784         {
785             precisionStar = true;
786             ++i;
787             if (i == length)
788                 return error();
789         }
790         else if ('0' <= c && c <= '9')
791         {
792             ++i;
793             if (i == length)
794                 return error();
795             while ('0' <= format[i] && format[i] <= '9')
796             {
797                 ++i;
798                 if (i == length)
799                     return error();
800             }
801         }
802     }
803 
804     /* Read the specifier
805      */
806     char genSpec;
807     Format specifier;
808     switch (format[i])
809     {
810         case 'm':
811             // https://www.gnu.org/software/libc/manual/html_node/Other-Output-Conversions.html
812             if (useGNUExts)
813             {
814                 specifier = Format.GNU_m;
815                 genSpec = format[i];
816                 ++i;
817                 break;
818             }
819             goto default;
820 
821         default:
822             specifier = parseGenericFormatSpecifier(format, i, genSpec);
823             if (specifier == Format.error)
824                 return error();
825             break;
826     }
827 
828     switch (genSpec)
829     {
830         case 'c':
831         case 's':
832         case 'C':
833         case 'S':
834             if (hash || zero)
835                 return error();
836             break;
837 
838         case 'd':
839         case 'i':
840             if (hash)
841                 return error();
842             break;
843 
844         case 'm':
845             if (hash || zero || flags)
846                 return error();
847             break;
848 
849         case 'n':
850             if (hash || zero || precision || width || flags)
851                 return error();
852             break;
853 
854         default:
855             break;
856     }
857 
858     idx = i;
859     return specifier;  // success
860 }
861 
862 /* Different kinds of conversion modifiers. */
863 enum Modifier
864 {
865     none,
866     h,          // short
867     hh,         // char
868     j,          // intmax_t
869     l,          // wint_t/wchar_t
870     ll,         // long long int
871     L,          // long double
872     m,          // char**
873     ml,         // wchar_t**
874     t,          // ptrdiff_t
875     z           // size_t
876 }
877 
878 /* Different kinds of formatting specifications, variations we don't
879    care about are merged. (Like we don't care about the difference between
880    f, e, g, a, etc.)
881 
882    For `scanf`, every format is a pointer.
883  */
884 enum Format
885 {
886     d,          // int
887     hhd,        // signed char
888     hd,         // short int
889     ld,         // long int
890     lld,        // long long int
891     jd,         // intmax_t
892     zd,         // size_t
893     td,         // ptrdiff_t
894     u,          // unsigned int
895     hhu,        // unsigned char
896     hu,         // unsigned short int
897     lu,         // unsigned long int
898     llu,        // unsigned long long int
899     ju,         // uintmax_t
900     g,          // float (scanf) / double (printf)
901     lg,         // double (scanf)
902     Lg,         // long double (both)
903     s,          // char string (both)
904     ls,         // wchar_t string (both)
905     c,          // char (printf)
906     lc,         // wint_t (printf)
907     p,          // pointer
908     n,          // pointer to int
909     hhn,        // pointer to signed char
910     hn,         // pointer to short
911     ln,         // pointer to long int
912     lln,        // pointer to long long int
913     jn,         // pointer to intmax_t
914     zn,         // pointer to size_t
915     tn,         // pointer to ptrdiff_t
916     GNU_m,      // GNU ext. : string corresponding to the error code in errno (printf)
917     POSIX_ms,   // POSIX ext. : dynamically allocated char string  (scanf)
918     POSIX_mls,  // POSIX ext. : dynamically allocated wchar_t string (scanf)
919     percent,    // %% (i.e. no argument)
920     error,      // invalid format specification
921 }
922 
923 /**************************************
924  * Parse the *length specifier* and the *specifier* of the following form:
925  * `[length]specifier`
926  *
927  * Params:
928  *      format = format string
929  *      idx = index of of start of format specifier,
930  *          which gets updated to index past the end of it,
931  *          even if `Format.error` is returned
932  *      genSpecifier = Generic specifier. For instance, it will be set to `d` if the
933  *           format is `hdd`.
934  * Returns:
935  *      Format
936  */
937 Format parseGenericFormatSpecifier(scope const char[] format,
938     ref size_t idx, out char genSpecifier) nothrow pure @safe
939 {
940     const length = format.length;
941 
942     /* Read the `length modifier`
943      */
944     const lm = format[idx];
945     Modifier flags;
946     switch (lm)
947     {
948         case 'j':
949         case 'z':
950         case 't':
951         case 'L':
952             flags = lm == 'j' ? Modifier.j :
953                     lm == 'z' ? Modifier.z :
954                     lm == 't' ? Modifier.t :
955                                 Modifier.L;
956             ++idx;
957             if (idx == length)
958                 return Format.error;
959             break;
960 
961         case 'h':
962         case 'l':
963             ++idx;
964             if (idx == length)
965                 return Format.error;
966             if (lm == format[idx])
967             {
968                 flags = lm == 'h' ? Modifier.hh : Modifier.ll;
969                 ++idx;
970                 if (idx == length)
971                     return Format.error;
972             }
973             else
974                 flags = lm == 'h' ? Modifier.h : Modifier.l;
975             break;
976 
977         default:
978             flags = Modifier.none;
979             break;
980     }
981 
982     /* Read the `specifier`
983      */
984     Format specifier;
985     const sc = format[idx];
986     genSpecifier = sc;
987     switch (sc)
988     {
989         case 'd':
990         case 'i':
991             specifier = flags == Modifier.none ? Format.d   :
992                         flags == Modifier.hh   ? Format.hhd :
993                         flags == Modifier.h    ? Format.hd  :
994                         flags == Modifier.ll   ? Format.lld :
995                         flags == Modifier.l    ? Format.ld  :
996                         flags == Modifier.j    ? Format.jd  :
997                         flags == Modifier.z    ? Format.zd  :
998                         flags == Modifier.t    ? Format.td  :
999                                                  Format.error;
1000             break;
1001 
1002         case 'u':
1003         case 'o':
1004         case 'x':
1005         case 'X':
1006             specifier = flags == Modifier.none ? Format.u   :
1007                         flags == Modifier.hh   ? Format.hhu :
1008                         flags == Modifier.h    ? Format.hu  :
1009                         flags == Modifier.ll   ? Format.llu :
1010                         flags == Modifier.l    ? Format.lu  :
1011                         flags == Modifier.j    ? Format.ju  :
1012                         flags == Modifier.z    ? Format.zd  :
1013                         flags == Modifier.t    ? Format.td  :
1014                                                  Format.error;
1015             break;
1016 
1017         case 'f':
1018         case 'F':
1019         case 'e':
1020         case 'E':
1021         case 'g':
1022         case 'G':
1023         case 'a':
1024         case 'A':
1025             specifier = flags == Modifier.none ? Format.g  :
1026                         flags == Modifier.L    ? Format.Lg :
1027                         flags == Modifier.l    ? Format.lg :
1028                                                  Format.error;
1029             break;
1030 
1031         case 'c':
1032             specifier = flags == Modifier.none ? Format.c       :
1033                         flags == Modifier.l    ? Format.lc      :
1034                                                  Format.error;
1035             break;
1036 
1037         case 's':
1038             specifier = flags == Modifier.none ? Format.s       :
1039                         flags == Modifier.l    ? Format.ls      :
1040                                                  Format.error;
1041             break;
1042 
1043         case 'p':
1044             specifier = flags == Modifier.none ? Format.p :
1045                                                  Format.error;
1046             break;
1047 
1048         case 'n':
1049             specifier = flags == Modifier.none ? Format.n   :
1050                         flags == Modifier.ll   ? Format.lln :
1051                         flags == Modifier.l    ? Format.ln  :
1052                         flags == Modifier.hh   ? Format.hhn :
1053                         flags == Modifier.h    ? Format.hn  :
1054                         flags == Modifier.j    ? Format.jn  :
1055                         flags == Modifier.z    ? Format.zn  :
1056                         flags == Modifier.t    ? Format.tn  :
1057                                                  Format.error;
1058             break;
1059 
1060         case 'C':
1061             // POSIX.1-2017 X/Open System Interfaces (XSI)
1062             // %C format is equivalent to %lc
1063             specifier = flags == Modifier.none ? Format.lc :
1064                                                  Format.error;
1065             break;
1066 
1067         case 'S':
1068             // POSIX.1-2017 X/Open System Interfaces (XSI)
1069             // %S format is equivalent to %ls
1070             specifier = flags == Modifier.none ? Format.ls :
1071                                                  Format.error;
1072             break;
1073 
1074         default:
1075             specifier = Format.error;
1076             break;
1077     }
1078 
1079     ++idx;
1080     return specifier; // success
1081 }
1082 
1083 @("parseGenericFormatSpecifier") unittest
1084 {
1085     char genSpecifier;
1086     size_t idx;
1087 
1088     void testG(string fmtStr, Format expectedFormat, char expectedGenSpecifier)
1089     {
1090         idx = 0;
1091         assert(parseGenericFormatSpecifier(fmtStr, idx, genSpecifier) == expectedFormat);
1092         assert(genSpecifier == expectedGenSpecifier);
1093     }
1094 
1095     testG("hhd", Format.hhd, 'd');
1096     testG("hn", Format.hn, 'n');
1097     testG("ji", Format.jd, 'i');
1098     testG("lu", Format.lu, 'u');
1099 
1100     idx = 0;
1101     assert(parseGenericFormatSpecifier("k", idx, genSpecifier) == Format.error);
1102 }
1103 
1104 @("parsePrintfFormatSpecifier") unittest
1105 {
1106     bool useGNUExts = false;
1107 
1108     size_t idx = 0;
1109     bool widthStar;
1110     bool precisionStar;
1111 
1112     void testP(string fmtStr, Format expectedFormat, size_t expectedIdx)
1113     {
1114         idx = 0;
1115         assert(parsePrintfFormatSpecifier(fmtStr, idx, widthStar, precisionStar, useGNUExts) == expectedFormat);
1116         assert(idx == expectedIdx);
1117     }
1118 
1119     // one for each Format
1120     testP("%d", Format.d, 2);
1121     assert(!widthStar && !precisionStar);
1122 
1123     testP("%ld", Format.ld, 3);
1124     testP("%lld", Format.lld, 4);
1125     testP("%jd", Format.jd, 3);
1126     testP("%zd", Format.zd, 3);
1127     testP("%td", Format.td, 3);
1128     testP("%g", Format.g, 2);
1129     testP("%Lg", Format.Lg, 3);
1130     testP("%p", Format.p, 2);
1131     testP("%n", Format.n, 2);
1132     testP("%ln", Format.ln, 3);
1133     testP("%lln", Format.lln, 4);
1134     testP("%hn", Format.hn, 3);
1135     testP("%hhn", Format.hhn, 4);
1136     testP("%jn", Format.jn, 3);
1137     testP("%zn", Format.zn, 3);
1138     testP("%tn", Format.tn, 3);
1139     testP("%c", Format.c, 2);
1140     testP("%lc", Format.lc, 3);
1141     testP("%s", Format.s, 2);
1142     testP("%ls", Format.ls, 3);
1143     testP("%%", Format.percent, 2);
1144 
1145     // Synonyms
1146     testP("%i", Format.d, 2);
1147     testP("%u", Format.u, 2);
1148     testP("%o", Format.u, 2);
1149     testP("%x", Format.u, 2);
1150     testP("%X", Format.u, 2);
1151     testP("%f", Format.g, 2);
1152     testP("%F", Format.g, 2);
1153     testP("%G", Format.g, 2);
1154     testP("%a", Format.g, 2);
1155     testP("%La", Format.Lg, 3);
1156     testP("%A", Format.g, 2);
1157     testP("%lg", Format.lg, 3);
1158 
1159     // width, precision
1160     testP("%*d", Format.d, 3);
1161     assert(widthStar && !precisionStar);
1162 
1163     testP("%.*d", Format.d, 4);
1164     assert(!widthStar && precisionStar);
1165 
1166     testP("%*.*d", Format.d, 5);
1167     assert(widthStar && precisionStar);
1168 
1169     // Too short formats
1170     foreach (s; ["%", "%-", "%+", "% ", "%#", "%0", "%*", "%1", "%19", "%.", "%.*", "%.1", "%.12",
1171                     "%j", "%z", "%t", "%l", "%h", "%ll", "%hh"])
1172     {
1173         testP(s, Format.error, s.length);
1174     }
1175 
1176     // Undefined format combinations
1177     foreach (s; ["%#d", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg",
1178                     "%#c", "%0c", "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc",
1179                     "%#s", "%0s", "%js", "%zs", "%ts", "%Ls", "%hs", "%hhs", "%lls",
1180                     "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp",
1181                     "%-n", "%+n", "% n", "%#n", "%0n", "%*n", "%1n", "%19n", "%.n", "%.*n", "%.1n", "%.12n", "%Ln", "%K"])
1182     {
1183         testP(s, Format.error, s.length);
1184     }
1185 
1186     testP("%C", Format.lc, 2);
1187     testP("%S", Format.ls, 2);
1188 
1189     // GNU extensions: explicitly toggle ISO/GNU flag.
1190     foreach (s; ["%jm", "%zm", "%tm", "%Lm", "%hm", "%hhm", "%lm", "%llm",
1191                     "%#m", "%+m", "%-m", "% m", "%0m"])
1192     {
1193         useGNUExts = false;
1194         testP(s, Format.error, s.length);
1195         useGNUExts = true;
1196         testP(s, Format.error, s.length);
1197     }
1198 
1199     foreach (s; ["%m", "%md", "%mz", "%mc", "%mm", "%msyz", "%ml", "%mlz", "%mlc", "%mlm"])
1200     {
1201         // valid cases, all parsed as `%m`
1202         // GNU printf()
1203         useGNUExts = true;
1204         testP(s, Format.GNU_m, 2);
1205 
1206         // ISO printf()
1207         useGNUExts = false;
1208         testP(s, Format.error, 2);
1209     }
1210 }
1211 
1212 @("parseScanfFormatSpecifier") unittest
1213 {
1214     size_t idx;
1215     bool asterisk;
1216 
1217     void testS(string fmtStr, Format expectedFormat, size_t expectedIdx)
1218     {
1219         idx = 0;
1220         assert(parseScanfFormatSpecifier(fmtStr, idx, asterisk) == expectedFormat);
1221         assert(idx == expectedIdx);
1222     }
1223 
1224     // one for each Format
1225     testS("%d", Format.d, 2);
1226     testS("%hhd", Format.hhd, 4);
1227     testS("%hd", Format.hd, 3);
1228     testS("%ld", Format.ld, 3);
1229     testS("%lld", Format.lld, 4);
1230     testS("%jd", Format.jd, 3);
1231     testS("%zd", Format.zd, 3);
1232     testS("%td", Format.td, 3);
1233     testS("%u", Format.u, 2);
1234     testS("%hhu", Format.hhu, 4);
1235     testS("%hu", Format.hu, 3);
1236     testS("%lu", Format.lu, 3);
1237     testS("%llu", Format.llu, 4);
1238     testS("%ju", Format.ju, 3);
1239     testS("%g", Format.g, 2);
1240     testS("%lg", Format.lg, 3);
1241     testS("%Lg", Format.Lg, 3);
1242     testS("%p", Format.p, 2);
1243     testS("%s", Format.s, 2);
1244     testS("%ls", Format.ls, 3);
1245     testS("%%", Format.percent, 2);
1246 
1247     // Synonyms
1248     testS("%i", Format.d, 2);
1249     testS("%n", Format.n, 2);
1250 
1251     testS("%o", Format.u, 2);
1252     testS("%x", Format.u, 2);
1253     testS("%f", Format.g, 2);
1254     testS("%e", Format.g, 2);
1255     testS("%a", Format.g, 2);
1256     testS("%c", Format.c, 2);
1257 
1258     // asterisk
1259     testS("%*d", Format.d, 3);
1260     assert(asterisk);
1261 
1262     testS("%9ld", Format.ld, 4);
1263     assert(!asterisk);
1264 
1265     testS("%*25984hhd", Format.hhd, 10);
1266     assert(asterisk);
1267 
1268     // scansets
1269     testS("%[a-zA-Z]", Format.s, 9);
1270     assert(!asterisk);
1271 
1272     testS("%*25l[a-z]", Format.ls, 10);
1273     assert(asterisk);
1274 
1275     testS("%[]]", Format.s, 4);
1276     assert(!asterisk);
1277 
1278     testS("%[^]]", Format.s, 5);
1279     assert(!asterisk);
1280 
1281     // Too short formats
1282     foreach (s; ["%", "% ", "%#", "%0", "%*", "%1", "%19",
1283                  "%j", "%z", "%t", "%l", "%h", "%ll", "%hh", "%K"])
1284     {
1285 
1286         testS(s, Format.error, s.length);
1287     }
1288 
1289 
1290     // Undefined format combinations
1291     foreach (s; ["%Ld", "%llg", "%jg", "%zg", "%tg", "%hg", "%hhg",
1292                  "%jc", "%zc", "%tc", "%Lc", "%hc", "%hhc", "%llc",
1293                  "%jp", "%zp", "%tp", "%Lp", "%hp", "%lp", "%hhp", "%llp",
1294                  "%-", "%+", "%#", "%0", "%.", "%Ln"])
1295     {
1296 
1297         testS(s, Format.error, s.length);
1298 
1299     }
1300 
1301     // Invalid scansets
1302     foreach (s; ["%[]", "%[^", "%[^]", "%[s", "%[0-9lld", "%[", "%l[^]"])
1303     {
1304 
1305         testS(s, Format.error, s.length);
1306     }
1307 
1308     // Posix extensions
1309     foreach (s; ["%jm", "%zm", "%tm", "%Lm", "%hm", "%hhm", "%lm", "%llm",
1310                  "%m", "%ma", "%md", "%ml", "%mm", "%mlb", "%mlj", "%mlr", "%mlz",
1311                  "%LC", "%lC", "%llC", "%jC", "%tC", "%hC", "%hhC", "%zC",
1312                  "%LS", "%lS", "%llS", "%jS", "%tS", "%hS", "%hhS", "%zS"])
1313     {
1314 
1315         testS(s, Format.error, s.length);
1316     }
1317 
1318     testS("%mc", Format.POSIX_ms, 3);
1319     testS("%ms", Format.POSIX_ms, 3);
1320     testS("%m[0-9]", Format.POSIX_ms, 7);
1321     testS("%mlc", Format.POSIX_mls, 4);
1322     testS("%mls", Format.POSIX_mls, 4);
1323     testS("%ml[^0-9]", Format.POSIX_mls, 9);
1324     testS("%mC", Format.POSIX_mls, 3);
1325     testS("%mS", Format.POSIX_mls, 3);
1326 
1327     testS("%C", Format.lc, 2);
1328     testS("%S", Format.ls, 2);
1329 }