1 /**
2  * Text macro processor for Ddoc.
3  *
4  * Copyright:   Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/dmacro.d, _dmacro.d)
8  * Documentation:  https://dlang.org/phobos/dmd_dmacro.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/dmacro.d
10  */
11 
12 module dmd.dmacro;
13 
14 import core.stdc.ctype;
15 import core.stdc.string;
16 import dmd.doc;
17 import dmd.common.outbuffer;
18 import dmd.root.rmem;
19 
20 extern (C++) struct MacroTable
21 {
22     /**********************************
23      * Define name=text macro.
24      * If macro `name` already exists, replace the text for it.
25      * Params:
26      *  name = name of macro
27      *  text = text of macro
28      */
29     extern (D) void define(const(char)[] name, const(char)[] text) nothrow pure @safe
30     {
31         //printf("MacroTable::define('%.*s' = '%.*s')\n", cast(int)name.length, name.ptr, text.length, text.ptr);
32         if (auto table = name in mactab)
33         {
34             (*table).text = text;
35             return;
36         }
37         mactab[name] = new Macro(name, text);
38     }
39 
40     /*****************************************************
41      * Look for macros in buf and expand them in place.
42      * Only look at the text in buf from start to pend.
43      *
44      * Returns: `true` on success, `false` when the recursion limit was reached
45      */
46     extern (D) bool expand(ref OutBuffer buf, size_t start, ref size_t pend, const(char)[] arg, int recursionLimit) nothrow pure
47     {
48         version (none)
49         {
50             printf("Macro::expand(buf[%d..%d], arg = '%.*s')\n", start, pend, cast(int)arg.length, arg.ptr);
51             printf("Buf is: '%.*s'\n", cast(int)(pend - start), buf.data + start);
52         }
53         // limit recursive expansion
54         recursionLimit--;
55         if (recursionLimit < 0)
56             return false;
57 
58         size_t end = pend;
59         assert(start <= end);
60         assert(end <= buf.length);
61         /* First pass - replace $0
62          */
63         arg = memdup(arg);
64         for (size_t u = start; u + 1 < end;)
65         {
66             char* p = cast(char*)buf[].ptr; // buf.data is not loop invariant
67             /* Look for $0, but not $$0, and replace it with arg.
68              */
69             if (p[u] == '$' && (isdigit(p[u + 1]) || p[u + 1] == '+'))
70             {
71                 if (u > start && p[u - 1] == '$')
72                 {
73                     // Don't expand $$0, but replace it with $0
74                     buf.remove(u - 1, 1);
75                     end--;
76                     u += 1; // now u is one past the closing '1'
77                     continue;
78                 }
79                 char c = p[u + 1];
80                 int n = (c == '+') ? -1 : c - '0';
81                 const(char)[] marg;
82                 if (n == 0)
83                 {
84                     marg = arg;
85                 }
86                 else
87                     extractArgN(arg, marg, n);
88                 if (marg.length == 0)
89                 {
90                     // Just remove macro invocation
91                     //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], cast(int)marg.length, marg.ptr);
92                     buf.remove(u, 2);
93                     end -= 2;
94                 }
95                 else if (c == '+')
96                 {
97                     // Replace '$+' with 'arg'
98                     //printf("Replacing '$%c' with '%.*s'\n", p[u + 1], cast(int)marg.length, marg.ptr);
99                     buf.remove(u, 2);
100                     buf.insert(u, marg);
101                     end += marg.length - 2;
102                     // Scan replaced text for further expansion
103                     size_t mend = u + marg.length;
104                     const success = expand(buf, u, mend, null, recursionLimit);
105                     if (!success)
106                         return false;
107                     end += mend - (u + marg.length);
108                     u = mend;
109                 }
110                 else
111                 {
112                     // Replace '$1' with '\xFF{arg\xFF}'
113                     //printf("Replacing '$%c' with '\xFF{%.*s\xFF}'\n", p[u + 1], cast(int)marg.length, marg.ptr);
114                     ubyte[] slice = cast(ubyte[])buf[];
115                     slice[u] = 0xFF;
116                     slice[u + 1] = '{';
117                     buf.insert(u + 2, marg);
118                     buf.insert(u + 2 + marg.length, "\xFF}");
119                     end += -2 + 2 + marg.length + 2;
120                     // Scan replaced text for further expansion
121                     size_t mend = u + 2 + marg.length;
122                     const success = expand(buf, u + 2, mend, null, recursionLimit);
123                     if (!success)
124                         return false;
125                     end += mend - (u + 2 + marg.length);
126                     u = mend;
127                 }
128                 //printf("u = %d, end = %d\n", u, end);
129                 //printf("#%.*s#\n", cast(int)end, &buf.data[0]);
130                 continue;
131             }
132             u++;
133         }
134         /* Second pass - replace other macros
135          */
136         for (size_t u = start; u + 4 < end;)
137         {
138             char* p = cast(char*)buf[].ptr; // buf.data is not loop invariant
139             /* A valid start of macro expansion is $(c, where c is
140              * an id start character, and not $$(c.
141              */
142             if (p[u] == '$' && p[u + 1] == '(' && isIdStart(p + u + 2))
143             {
144                 //printf("\tfound macro start '%c'\n", p[u + 2]);
145                 char* name = p + u + 2;
146                 size_t namelen = 0;
147                 const(char)[] marg;
148                 size_t v;
149                 /* Scan forward to find end of macro name and
150                  * beginning of macro argument (marg).
151                  */
152                 for (v = u + 2; v < end; v += utfStride(p + v))
153                 {
154                     if (!isIdTail(p + v))
155                     {
156                         // We've gone past the end of the macro name.
157                         namelen = v - (u + 2);
158                         break;
159                     }
160                 }
161                 v += extractArgN(p[v .. end], marg, 0);
162                 assert(v <= end);
163                 if (v < end)
164                 {
165                     // v is on the closing ')'
166                     if (u > start && p[u - 1] == '$')
167                     {
168                         // Don't expand $$(NAME), but replace it with $(NAME)
169                         buf.remove(u - 1, 1);
170                         end--;
171                         u = v; // now u is one past the closing ')'
172                         continue;
173                     }
174                     Macro* m = search(name[0 .. namelen]);
175                     if (!m)
176                     {
177                         immutable undef = "DDOC_UNDEFINED_MACRO";
178                         m = search(undef);
179                         if (m)
180                         {
181                             // Macro was not defined, so this is an expansion of
182                             //   DDOC_UNDEFINED_MACRO. Prepend macro name to args.
183                             // marg = name[ ] ~ "," ~ marg[ ];
184                             if (marg.length)
185                             {
186                                 char* q = cast(char*)mem.xmalloc(namelen + 1 + marg.length);
187                                 assert(q);
188                                 memcpy(q, name, namelen);
189                                 q[namelen] = ',';
190                                 memcpy(q + namelen + 1, marg.ptr, marg.length);
191                                 marg = q[0 .. marg.length + namelen + 1];
192                             }
193                             else
194                             {
195                                 marg = name[0 .. namelen];
196                             }
197                         }
198                     }
199                     if (m)
200                     {
201                         if (m.inuse && marg.length == 0)
202                         {
203                             // Remove macro invocation
204                             buf.remove(u, v + 1 - u);
205                             end -= v + 1 - u;
206                         }
207                         else if (m.inuse && ((arg.length == marg.length && memcmp(arg.ptr, marg.ptr, arg.length) == 0) ||
208                                              (arg.length + 4 == marg.length && marg[0] == 0xFF && marg[1] == '{' && memcmp(arg.ptr, marg.ptr + 2, arg.length) == 0 && marg[marg.length - 2] == 0xFF && marg[marg.length - 1] == '}')))
209                         {
210                             /* Recursive expansion:
211                              *   marg is same as arg (with blue paint added)
212                              * Just leave in place.
213                              */
214                         }
215                         else
216                         {
217                             //printf("\tmacro '%.*s'(%.*s) = '%.*s'\n", cast(int)m.namelen, m.name, cast(int)marg.length, marg.ptr, cast(int)m.textlen, m.text);
218                             marg = memdup(marg);
219                             // Insert replacement text
220                             buf.spread(v + 1, 2 + m.text.length + 2);
221                             ubyte[] slice = cast(ubyte[])buf[];
222                             slice[v + 1] = 0xFF;
223                             slice[v + 2] = '{';
224                             slice[v + 3 .. v + 3 + m.text.length] = cast(ubyte[])m.text[];
225                             slice[v + 3 + m.text.length] = 0xFF;
226                             slice[v + 3 + m.text.length + 1] = '}';
227                             end += 2 + m.text.length + 2;
228                             // Scan replaced text for further expansion
229                             m.inuse++;
230                             size_t mend = v + 1 + 2 + m.text.length + 2;
231                             const success = expand(buf, v + 1, mend, marg, recursionLimit);
232                             if (!success)
233                                 return false;
234                             end += mend - (v + 1 + 2 + m.text.length + 2);
235                             m.inuse--;
236                             buf.remove(u, v + 1 - u);
237                             end -= v + 1 - u;
238                             u += mend - (v + 1);
239                             mem.xfree(cast(char*)marg.ptr);
240                             //printf("u = %d, end = %d\n", u, end);
241                             //printf("#%.*s#\n", cast(int)(end - u), &buf.data[u]);
242                             continue;
243                         }
244                     }
245                     else
246                     {
247                         // Replace $(NAME) with nothing
248                         buf.remove(u, v + 1 - u);
249                         end -= (v + 1 - u);
250                         continue;
251                     }
252                 }
253             }
254             u++;
255         }
256         mem.xfree(cast(char*)arg);
257         pend = end;
258         return true;
259     }
260 
261   private:
262 
263     extern (D) Macro* search(const(char)[] name) @nogc nothrow pure @safe
264     {
265         //printf("Macro::search(%.*s)\n", cast(int)name.length, name.ptr);
266         if (auto table = name in mactab)
267         {
268             //printf("\tfound %d\n", table.textlen);
269             return *table;
270         }
271         return null;
272     }
273 
274     private Macro*[const(char)[]] mactab;
275 }
276 
277 /* ************************************************************************ */
278 
279 private:
280 
281 struct Macro
282 {
283     const(char)[] name;     // macro name
284     const(char)[] text;     // macro replacement text
285     int inuse;              // macro is in use (don't expand)
286 
287     this(const(char)[] name, const(char)[] text) @nogc nothrow pure @safe
288     {
289         this.name = name;
290         this.text = text;
291     }
292 }
293 
294 /************************
295  * Make mutable copy of slice p.
296  * Params:
297  *      p = slice
298  * Returns:
299  *      copy allocated with mem.xmalloc()
300  */
301 
302 char[] memdup(const(char)[] p) nothrow pure @trusted
303 {
304     size_t len = p.length;
305     return (cast(char*)memcpy(mem.xmalloc(len), p.ptr, len))[0 .. len];
306 }
307 
308 /**********************************************************
309  * Given buffer buf[], extract argument marg[].
310  * Params:
311  *      buf = source string
312  *      marg = set to slice of buf[]
313  *      n =     0:      get entire argument
314  *              1..9:   get nth argument
315  *              -1:     get 2nd through end
316  */
317 size_t extractArgN(const(char)[] buf, out const(char)[] marg, int n) @nogc nothrow pure
318 {
319     /* Scan forward for matching right parenthesis.
320      * Nest parentheses.
321      * Skip over "..." and '...' strings inside HTML tags.
322      * Skip over <!-- ... --> comments.
323      * Skip over previous macro insertions
324      * Set marg.
325      */
326     uint parens = 1;
327     ubyte instring = 0;
328     uint incomment = 0;
329     uint intag = 0;
330     uint inexp = 0;
331     uint argn = 0;
332     size_t v = 0;
333     const p = buf.ptr;
334     const end = buf.length;
335 Largstart:
336     // Skip first space, if any, to find the start of the macro argument
337     if (n != 1 && v < end && isspace(p[v]))
338         v++;
339     size_t vstart = v;
340     for (; v < end; v++)
341     {
342         char c = p[v];
343         switch (c)
344         {
345         case ',':
346             if (!inexp && !instring && !incomment && parens == 1)
347             {
348                 argn++;
349                 if (argn == 1 && n == -1)
350                 {
351                     v++;
352                     goto Largstart;
353                 }
354                 if (argn == n)
355                     break;
356                 if (argn + 1 == n)
357                 {
358                     v++;
359                     goto Largstart;
360                 }
361             }
362             continue;
363         case '(':
364             if (!inexp && !instring && !incomment)
365                 parens++;
366             continue;
367         case ')':
368             if (!inexp && !instring && !incomment && --parens == 0)
369             {
370                 break;
371             }
372             continue;
373         case '"':
374         case '\'':
375             if (!inexp && !incomment && intag)
376             {
377                 if (c == instring)
378                     instring = 0;
379                 else if (!instring)
380                     instring = c;
381             }
382             continue;
383         case '<':
384             if (!inexp && !instring && !incomment)
385             {
386                 if (v + 6 < end && p[v + 1] == '!' && p[v + 2] == '-' && p[v + 3] == '-')
387                 {
388                     incomment = 1;
389                     v += 3;
390                 }
391                 else if (v + 2 < end && isalpha(p[v + 1]))
392                     intag = 1;
393             }
394             continue;
395         case '>':
396             if (!inexp)
397                 intag = 0;
398             continue;
399         case '-':
400             if (!inexp && !instring && incomment && v + 2 < end && p[v + 1] == '-' && p[v + 2] == '>')
401             {
402                 incomment = 0;
403                 v += 2;
404             }
405             continue;
406         case 0xFF:
407             if (v + 1 < end)
408             {
409                 if (p[v + 1] == '{')
410                     inexp++;
411                 else if (p[v + 1] == '}')
412                     inexp--;
413             }
414             continue;
415         default:
416             continue;
417         }
418         break;
419     }
420     if (argn == 0 && n == -1)
421         marg = p[v .. v];
422     else
423         marg = p[vstart .. v];
424     //printf("extractArg%d('%.*s') = '%.*s'\n", n, cast(int)end, p, cast(int)marg.length, marg.ptr);
425     return v;
426 }