1 /** 2 * Defines a package and module. 3 * 4 * Specification: $(LINK2 https://dlang.org/spec/module.html, Modules) 5 * 6 * Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved 7 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 8 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/dmodule.d, _dmodule.d) 10 * Documentation: https://dlang.org/phobos/dmd_dmodule.html 11 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/dmodule.d 12 */ 13 14 module dmd.dmodule; 15 16 import core.stdc.stdio; 17 import core.stdc.stdlib; 18 import core.stdc.string; 19 import dmd.aggregate; 20 import dmd.arraytypes; 21 import dmd.astcodegen; 22 import dmd.astenums; 23 import dmd.compiler; 24 import dmd.gluelayer; 25 import dmd.dimport; 26 import dmd.dmacro; 27 import dmd.doc; 28 import dmd.dscope; 29 import dmd.dsymbol; 30 import dmd.dsymbolsem; 31 import dmd.errors; 32 import dmd.errorsink; 33 import dmd.expression; 34 import dmd.expressionsem; 35 import dmd.file_manager; 36 import dmd.globals; 37 import dmd.id; 38 import dmd.identifier; 39 import dmd.location; 40 import dmd.parse; 41 import dmd.cparse; 42 import dmd.root.array; 43 import dmd.root.file; 44 import dmd.root.filename; 45 import dmd.common.outbuffer; 46 import dmd.root.port; 47 import dmd.root.rmem; 48 import dmd.rootobject; 49 import dmd.root.string; 50 import dmd.semantic2; 51 import dmd.semantic3; 52 import dmd.target; 53 import dmd.utils; 54 import dmd.visitor; 55 56 version (IN_GCC) {} 57 else version (IN_LLVM) {} 58 else version = MARS; 59 60 // function used to call semantic3 on a module's dependencies 61 void semantic3OnDependencies(Module m) 62 { 63 if (!m) 64 return; 65 66 if (m.semanticRun > PASS.semantic3) 67 return; 68 69 m.semantic3(null); 70 71 foreach (i; 1 .. m.aimports.length) 72 semantic3OnDependencies(m.aimports[i]); 73 } 74 75 /** 76 * Remove generated .di files on error and exit 77 */ 78 void removeHdrFilesAndFail(ref Param params, ref Modules modules) nothrow 79 { 80 if (params.dihdr.doOutput) 81 { 82 foreach (m; modules) 83 { 84 if (m.filetype == FileType.dhdr) 85 continue; 86 File.remove(m.hdrfile.toChars()); 87 } 88 } 89 90 fatal(); 91 } 92 93 /** 94 * Converts a chain of identifiers to the filename of the module 95 * 96 * Params: 97 * packages = the names of the "parent" packages 98 * ident = the name of the child package or module 99 * 100 * Returns: 101 * the filename of the child package or module 102 */ 103 private const(char)[] getFilename(Identifier[] packages, Identifier ident) nothrow 104 { 105 const(char)[] filename = ident.toString(); 106 107 OutBuffer buf; 108 OutBuffer dotmods; 109 auto modAliases = &global.params.modFileAliasStrings; 110 111 if (packages.length == 0 && modAliases.length == 0) 112 return filename; 113 114 void checkModFileAlias(const(char)[] p) 115 { 116 /* Check and replace the contents of buf[] with 117 * an alias string from global.params.modFileAliasStrings[] 118 */ 119 dotmods.writestring(p); 120 foreach_reverse (const m; *modAliases) 121 { 122 const q = strchr(m, '='); 123 assert(q); 124 if (dotmods.length == q - m && memcmp(dotmods.peekChars(), m, q - m) == 0) 125 { 126 buf.setsize(0); 127 auto rhs = q[1 .. strlen(q)]; 128 if (rhs.length > 0 && (rhs[$ - 1] == '/' || rhs[$ - 1] == '\\')) 129 rhs = rhs[0 .. $ - 1]; // remove trailing separator 130 buf.writestring(rhs); 131 break; // last matching entry in ms[] wins 132 } 133 } 134 dotmods.writeByte('.'); 135 } 136 137 foreach (pid; packages) 138 { 139 const p = pid.toString(); 140 buf.writestring(p); 141 if (modAliases.length) 142 checkModFileAlias(p); 143 version (Windows) 144 enum FileSeparator = '\\'; 145 else 146 enum FileSeparator = '/'; 147 buf.writeByte(FileSeparator); 148 } 149 buf.writestring(filename); 150 if (modAliases.length) 151 checkModFileAlias(filename); 152 buf.writeByte(0); 153 filename = buf.extractSlice()[0 .. $ - 1]; 154 155 return filename; 156 } 157 158 /*********************************************************** 159 */ 160 extern (C++) class Package : ScopeDsymbol 161 { 162 PKG isPkgMod = PKG.unknown; 163 uint tag; // auto incremented tag, used to mask package tree in scopes 164 Module mod; // !=null if isPkgMod == PKG.module_ 165 166 final extern (D) this(const ref Loc loc, Identifier ident) nothrow 167 { 168 super(loc, ident); 169 __gshared uint packageTag; 170 this.tag = packageTag++; 171 } 172 173 override const(char)* kind() const nothrow 174 { 175 return "package"; 176 } 177 178 override bool equals(const RootObject o) const 179 { 180 // custom 'equals' for bug 17441. "package a" and "module a" are not equal 181 if (this == o) 182 return true; 183 auto p = cast(Package)o; 184 return p && isModule() == p.isModule() && ident.equals(p.ident); 185 } 186 187 /**************************************************** 188 * Input: 189 * packages[] the pkg1.pkg2 of pkg1.pkg2.mod 190 * Returns: 191 * the symbol table that mod should be inserted into 192 * Output: 193 * *pparent the rightmost package, i.e. pkg2, or NULL if no packages 194 * *ppkg the leftmost package, i.e. pkg1, or NULL if no packages 195 */ 196 extern (D) static DsymbolTable resolve(Identifier[] packages, Dsymbol* pparent, Package* ppkg) 197 { 198 DsymbolTable dst = Module.modules; 199 Dsymbol parent = null; 200 //printf("Package::resolve()\n"); 201 if (ppkg) 202 *ppkg = null; 203 foreach (pid; packages) 204 { 205 Package pkg; 206 Dsymbol p = dst.lookup(pid); 207 if (!p) 208 { 209 pkg = new Package(Loc.initial, pid); 210 dst.insert(pkg); 211 pkg.parent = parent; 212 pkg.symtab = new DsymbolTable(); 213 } 214 else 215 { 216 pkg = p.isPackage(); 217 assert(pkg); 218 // It might already be a module, not a package, but that needs 219 // to be checked at a higher level, where a nice error message 220 // can be generated. 221 // dot net needs modules and packages with same name 222 // But we still need a symbol table for it 223 if (!pkg.symtab) 224 pkg.symtab = new DsymbolTable(); 225 } 226 parent = pkg; 227 dst = pkg.symtab; 228 if (ppkg && !*ppkg) 229 *ppkg = pkg; 230 if (pkg.isModule()) 231 { 232 // Return the module so that a nice error message can be generated 233 if (ppkg) 234 *ppkg = cast(Package)p; 235 break; 236 } 237 } 238 239 if (pparent) 240 *pparent = parent; 241 return dst; 242 } 243 244 override final inout(Package) isPackage() inout 245 { 246 return this; 247 } 248 249 /** 250 * Checks if pkg is a sub-package of this 251 * 252 * For example, if this qualifies to 'a1.a2' and pkg - to 'a1.a2.a3', 253 * this function returns 'true'. If it is other way around or qualified 254 * package paths conflict function returns 'false'. 255 * 256 * Params: 257 * pkg = possible subpackage 258 * 259 * Returns: 260 * see description 261 */ 262 final bool isAncestorPackageOf(const Package pkg) const 263 { 264 if (this == pkg) 265 return true; 266 if (!pkg || !pkg.parent) 267 return false; 268 return isAncestorPackageOf(pkg.parent.isPackage()); 269 } 270 271 override Dsymbol search(const ref Loc loc, Identifier ident, int flags = SearchLocalsOnly) 272 { 273 //printf("%s Package.search('%s', flags = x%x)\n", toChars(), ident.toChars(), flags); 274 flags &= ~SearchLocalsOnly; // searching an import is always transitive 275 if (!isModule() && mod) 276 { 277 // Prefer full package name. 278 Dsymbol s = symtab ? symtab.lookup(ident) : null; 279 if (s) 280 return s; 281 //printf("[%s] through pkdmod: %s\n", loc.toChars(), toChars()); 282 return mod.search(loc, ident, flags); 283 } 284 return ScopeDsymbol.search(loc, ident, flags); 285 } 286 287 override void accept(Visitor v) 288 { 289 v.visit(this); 290 } 291 292 final Module isPackageMod() 293 { 294 if (isPkgMod == PKG.module_) 295 { 296 return mod; 297 } 298 return null; 299 } 300 301 /** 302 * Checks for the existence of a package.d to set isPkgMod appropriately 303 * if isPkgMod == PKG.unknown 304 */ 305 final void resolvePKGunknown() 306 { 307 if (isModule()) 308 return; 309 if (isPkgMod != PKG.unknown) 310 return; 311 312 Identifier[] packages; 313 for (Dsymbol s = this.parent; s; s = s.parent) 314 packages ~= s.ident; 315 reverse(packages); 316 317 if (Module.find(getFilename(packages, ident))) 318 Module.load(Loc.initial, packages, this.ident); 319 else 320 isPkgMod = PKG.package_; 321 } 322 } 323 324 /*********************************************************** 325 */ 326 extern (C++) final class Module : Package 327 { 328 extern (C++) __gshared Module rootModule; 329 extern (C++) __gshared DsymbolTable modules; // symbol table of all modules 330 extern (C++) __gshared Modules amodules; // array of all modules 331 extern (C++) __gshared Dsymbols deferred; // deferred Dsymbol's needing semantic() run on them 332 extern (C++) __gshared Dsymbols deferred2; // deferred Dsymbol's needing semantic2() run on them 333 extern (C++) __gshared Dsymbols deferred3; // deferred Dsymbol's needing semantic3() run on them 334 335 static void _init() 336 { 337 modules = new DsymbolTable(); 338 } 339 340 /** 341 * Deinitializes the global state of the compiler. 342 * 343 * This can be used to restore the state set by `_init` to its original 344 * state. 345 */ 346 static void deinitialize() 347 { 348 modules = modules.init; 349 } 350 351 extern (C++) __gshared AggregateDeclaration moduleinfo; 352 353 const(char)[] arg; // original argument name 354 ModuleDeclaration* md; // if !=null, the contents of the ModuleDeclaration declaration 355 const FileName srcfile; // input source file 356 const FileName objfile; // output .obj file 357 const FileName hdrfile; // 'header' file 358 FileName docfile; // output documentation file 359 const(ubyte)[] src; /// Raw content of the file 360 uint errors; // if any errors in file 361 uint numlines; // number of lines in source file 362 FileType filetype; // source file type 363 bool hasAlwaysInlines; // contains references to functions that must be inlined 364 bool isPackageFile; // if it is a package.d 365 Package pkg; // if isPackageFile is true, the Package that contains this package.d 366 Strings contentImportedFiles; // array of files whose content was imported 367 int needmoduleinfo; 368 private ThreeState selfimports; 369 private ThreeState rootimports; 370 Dsymbol[void*] tagSymTab; /// ImportC: tag symbols that conflict with other symbols used as the index 371 372 private OutBuffer defines; // collect all the #define lines here 373 374 375 /************************************* 376 * Return true if module imports itself. 377 */ 378 bool selfImports() 379 { 380 //printf("Module::selfImports() %s\n", toChars()); 381 if (selfimports == ThreeState.none) 382 { 383 foreach (Module m; amodules) 384 m.insearch = false; 385 selfimports = imports(this) ? ThreeState.yes : ThreeState.no; 386 foreach (Module m; amodules) 387 m.insearch = false; 388 } 389 return selfimports == ThreeState.yes; 390 } 391 392 /************************************* 393 * Return true if module imports root module. 394 */ 395 bool rootImports() 396 { 397 //printf("Module::rootImports() %s\n", toChars()); 398 if (rootimports == ThreeState.none) 399 { 400 foreach (Module m; amodules) 401 m.insearch = false; 402 rootimports = ThreeState.no; 403 foreach (Module m; amodules) 404 { 405 if (m.isRoot() && imports(m)) 406 { 407 rootimports = ThreeState.yes; 408 break; 409 } 410 } 411 foreach (Module m; amodules) 412 m.insearch = false; 413 } 414 return rootimports == ThreeState.yes; 415 } 416 417 private Identifier searchCacheIdent; 418 private Dsymbol searchCacheSymbol; // cached value of search 419 private int searchCacheFlags; // cached flags 420 private bool insearch; 421 422 /** 423 * A root module is one that will be compiled all the way to 424 * object code. This field holds the root module that caused 425 * this module to be loaded. If this module is a root module, 426 * then it will be set to `this`. This is used to determine 427 * ownership of template instantiation. 428 */ 429 Module importedFrom; 430 431 Dsymbols* decldefs; // top level declarations for this Module 432 433 Modules aimports; // all imported modules 434 435 uint debuglevel; // debug level 436 Identifiers* debugids; // debug identifiers 437 Identifiers* debugidsNot; // forward referenced debug identifiers 438 439 uint versionlevel; // version level 440 Identifiers* versionids; // version identifiers 441 Identifiers* versionidsNot; // forward referenced version identifiers 442 443 MacroTable macrotable; // document comment macros 444 Escape* _escapetable; // document comment escapes 445 446 size_t nameoffset; // offset of module name from start of ModuleInfo 447 size_t namelen; // length of module name in characters 448 449 extern (D) this(const ref Loc loc, const(char)[] filename, Identifier ident, int doDocComment, int doHdrGen) 450 { 451 super(loc, ident); 452 const(char)[] srcfilename; 453 //printf("Module::Module(filename = '%.*s', ident = '%s')\n", cast(int)filename.length, filename.ptr, ident.toChars()); 454 this.arg = filename; 455 srcfilename = FileName.defaultExt(filename, mars_ext); 456 if (target.run_noext && global.params.run && 457 !FileName.ext(filename) && 458 FileName.exists(srcfilename) == 0 && 459 FileName.exists(filename) == 1) 460 { 461 FileName.free(srcfilename.ptr); 462 srcfilename = FileName.removeExt(filename); // just does a mem.strdup(filename) 463 } 464 else if (!FileName.equalsExt(srcfilename, mars_ext) && 465 !FileName.equalsExt(srcfilename, hdr_ext) && 466 !FileName.equalsExt(srcfilename, c_ext) && 467 !FileName.equalsExt(srcfilename, i_ext) && 468 !FileName.equalsExt(srcfilename, dd_ext)) 469 { 470 471 error(loc, "%s `%s` source file name '%.*s' must have .%.*s extension", 472 kind, toPrettyChars, 473 cast(int)srcfilename.length, srcfilename.ptr, 474 cast(int)mars_ext.length, mars_ext.ptr); 475 fatal(); 476 } 477 478 srcfile = FileName(srcfilename); 479 objfile = setOutfilename(global.params.objname, global.params.objdir, filename, target.obj_ext); 480 if (doDocComment) 481 setDocfile(); 482 if (doHdrGen) 483 hdrfile = setOutfilename(global.params.dihdr.name, global.params.dihdr.dir, arg, hdr_ext); 484 } 485 486 extern (D) this(const(char)[] filename, Identifier ident, int doDocComment, int doHdrGen) 487 { 488 this(Loc.initial, filename, ident, doDocComment, doHdrGen); 489 } 490 491 static Module create(const(char)* filename, Identifier ident, int doDocComment, int doHdrGen) 492 { 493 return create(filename.toDString, ident, doDocComment, doHdrGen); 494 } 495 496 extern (D) static Module create(const(char)[] filename, Identifier ident, int doDocComment, int doHdrGen) 497 { 498 return new Module(Loc.initial, filename, ident, doDocComment, doHdrGen); 499 } 500 501 static const(char)* find(const(char)* filename) 502 { 503 return find(filename.toDString).ptr; 504 } 505 506 extern (D) static const(char)[] find(const(char)[] filename) 507 { 508 return global.fileManager.lookForSourceFile(filename, global.path ? (*global.path)[] : null); 509 } 510 511 extern (C++) static Module load(const ref Loc loc, Identifiers* packages, Identifier ident) 512 { 513 return load(loc, packages ? (*packages)[] : null, ident); 514 } 515 516 extern (D) static Module load(const ref Loc loc, Identifier[] packages, Identifier ident) 517 { 518 //printf("Module::load(ident = '%s')\n", ident.toChars()); 519 // Build module filename by turning: 520 // foo.bar.baz 521 // into: 522 // foo\bar\baz 523 const(char)[] filename = getFilename(packages, ident); 524 // Look for the source file 525 if (const result = find(filename)) 526 filename = result; // leaks 527 528 auto m = new Module(loc, filename, ident, 0, 0); 529 530 if (!m.read(loc)) 531 return null; 532 if (global.params.v.verbose) 533 { 534 OutBuffer buf; 535 foreach (pid; packages) 536 { 537 buf.writestring(pid.toString()); 538 buf.writeByte('.'); 539 } 540 buf.printf("%s\t(%s)", ident.toChars(), m.srcfile.toChars()); 541 message("import %s", buf.peekChars()); 542 } 543 if((m = m.parse()) is null) return null; 544 545 return m; 546 } 547 548 override const(char)* kind() const 549 { 550 return "module"; 551 } 552 553 /********************************************* 554 * Combines things into output file name for .html and .di files. 555 * Input: 556 * name Command line name given for the file, NULL if none 557 * dir Command line directory given for the file, NULL if none 558 * arg Name of the source file 559 * ext File name extension to use if 'name' is NULL 560 * global.params.preservePaths get output path from arg 561 * srcfile Input file - output file name must not match input file 562 */ 563 extern(D) FileName setOutfilename(const(char)[] name, const(char)[] dir, const(char)[] arg, const(char)[] ext) 564 { 565 const(char)[] docfilename; 566 if (name) 567 { 568 docfilename = name; 569 } 570 else 571 { 572 const(char)[] argdoc; 573 OutBuffer buf; 574 if (arg == "__stdin.d") 575 { 576 version (Posix) 577 import core.sys.posix.unistd : getpid; 578 else version (Windows) 579 import core.sys.windows.winbase : getpid = GetCurrentProcessId; 580 buf.printf("__stdin_%d.d", getpid()); 581 arg = buf[]; 582 } 583 if (global.params.preservePaths) 584 argdoc = arg; 585 else 586 argdoc = FileName.name(arg); 587 // If argdoc doesn't have an absolute path, make it relative to dir 588 if (!FileName.absolute(argdoc)) 589 { 590 //FileName::ensurePathExists(dir); 591 argdoc = FileName.combine(dir, argdoc); 592 } 593 docfilename = FileName.forceExt(argdoc, ext); 594 } 595 if (FileName.equals(docfilename, srcfile.toString())) 596 { 597 error(loc, "%s `%s` source file and output file have same name '%s'", 598 kind, toPrettyChars, srcfile.toChars()); 599 fatal(); 600 } 601 return FileName(docfilename); 602 } 603 604 extern (D) void setDocfile() 605 { 606 docfile = setOutfilename(global.params.ddoc.name, global.params.ddoc.dir, arg, doc_ext); 607 } 608 609 /** 610 * Trigger the relevant semantic error when a file cannot be read 611 * 612 * We special case `object.d` as a failure is likely to be a rare 613 * but difficult to diagnose case for the user. Packages also require 614 * special handling to avoid exposing the compiler's internals. 615 * 616 * Params: 617 * loc = The location at which the file read originated (e.g. import) 618 */ 619 private void onFileReadError(const ref Loc loc) 620 { 621 if (FileName.equals(srcfile.toString(), "object.d")) 622 { 623 .error(loc, "cannot find source code for runtime library file 'object.d'"); 624 version (IN_LLVM) 625 { 626 errorSupplemental(loc, "ldc2 might not be correctly installed."); 627 errorSupplemental(loc, "Please check your ldc2.conf configuration file."); 628 errorSupplemental(loc, "Installation instructions can be found at http://wiki.dlang.org/LDC."); 629 } 630 version (MARS) 631 { 632 errorSupplemental(loc, "dmd might not be correctly installed. Run 'dmd -man' for installation instructions."); 633 const dmdConfFile = global.inifilename.length ? FileName.canonicalName(global.inifilename) : "not found"; 634 errorSupplemental(loc, "config file: %.*s", cast(int)dmdConfFile.length, dmdConfFile.ptr); 635 } 636 } 637 else if (FileName.ext(this.arg) || !loc.isValid()) 638 { 639 // Modules whose original argument name has an extension, or do not 640 // have a valid location come from the command-line. 641 // Error that their file cannot be found and return early. 642 .error(loc, "cannot find input file `%s`", srcfile.toChars()); 643 } 644 else 645 { 646 // if module is not named 'package' but we're trying to read 'package.d', we're looking for a package module 647 bool isPackageMod = (strcmp(toChars(), "package") != 0) && isPackageFileName(srcfile); 648 if (isPackageMod) 649 .error(loc, "importing package '%s' requires a 'package.d' file which cannot be found in '%s'", toChars(), srcfile.toChars()); 650 else 651 { 652 .error(loc, "unable to read module `%s`", toChars()); 653 const pkgfile = FileName.combine(FileName.removeExt(srcfile.toString()), package_d); 654 .errorSupplemental(loc, "Expected '%s' or '%s' in one of the following import paths:", 655 srcfile.toChars(), pkgfile.ptr); 656 } 657 } 658 if (!global.gag) 659 { 660 /* Print path 661 */ 662 if (global.path) 663 { 664 foreach (i, p; *global.path) 665 fprintf(stderr, "import path[%llu] = %s\n", cast(ulong)i, p); 666 } 667 else 668 { 669 fprintf(stderr, "Specify path to file '%s' with -I switch\n", srcfile.toChars()); 670 } 671 672 removeHdrFilesAndFail(global.params, Module.amodules); 673 } 674 } 675 676 /** 677 * Reads the file from `srcfile` and loads the source buffer. 678 * 679 * If makefile module dependency is requested, we add this module 680 * to the list of dependencies from here. 681 * 682 * Params: 683 * loc = the location 684 * 685 * Returns: `true` if successful 686 */ 687 bool read(const ref Loc loc) 688 { 689 if (this.src) 690 return true; // already read 691 692 //printf("Module::read('%s') file '%s'\n", toChars(), srcfile.toChars()); 693 694 /* Preprocess the file if it's a .c file 695 */ 696 FileName filename = srcfile; 697 bool ifile = false; // did we generate a .i file 698 scope (exit) 699 { 700 if (ifile) 701 File.remove(filename.toChars()); // remove generated file 702 } 703 704 if (global.preprocess && 705 FileName.equalsExt(srcfile.toString(), c_ext) && 706 FileName.exists(srcfile.toString())) 707 { 708 filename = global.preprocess(srcfile, loc, ifile, &defines); // run C preprocessor 709 } 710 711 if (auto result = global.fileManager.lookup(filename)) 712 { 713 this.src = result; 714 if (global.params.makeDeps.doOutput) 715 global.params.makeDeps.files.push(srcfile.toChars()); 716 return true; 717 } 718 719 this.onFileReadError(loc); 720 return false; 721 } 722 723 /// syntactic parse 724 Module parse() 725 { 726 return parseModule!ASTCodegen(); 727 } 728 729 /// ditto 730 extern (D) Module parseModule(AST)() 731 { 732 const(char)* srcname = srcfile.toChars(); 733 //printf("Module::parse(srcname = '%s')\n", srcname); 734 isPackageFile = isPackageFileName(srcfile); 735 const(char)[] buf = processSource(src, this); 736 // an error happened on UTF conversion 737 if (buf is null) return null; 738 739 /* If it starts with the string "Ddoc", then it's a documentation 740 * source file. 741 */ 742 if (buf.length>= 4 && buf[0..4] == "Ddoc") 743 { 744 comment = buf.ptr + 4; 745 filetype = FileType.ddoc; 746 if (!docfile) 747 setDocfile(); 748 return this; 749 } 750 /* If it has the extension ".dd", it is also a documentation 751 * source file. Documentation source files may begin with "Ddoc" 752 * but do not have to if they have the .dd extension. 753 * https://issues.dlang.org/show_bug.cgi?id=15465 754 */ 755 if (FileName.equalsExt(arg, dd_ext)) 756 { 757 comment = buf.ptr; // the optional Ddoc, if present, is handled above. 758 filetype = FileType.ddoc; 759 if (!docfile) 760 setDocfile(); 761 return this; 762 } 763 /* If it has the extension ".di", it is a "header" file. 764 */ 765 if (FileName.equalsExt(arg, hdr_ext)) 766 filetype = FileType.dhdr; 767 768 /// Promote `this` to a root module if requested via `-i` 769 void checkCompiledImport() 770 { 771 if (!this.isRoot() && Compiler.onImport(this)) 772 this.importedFrom = this; 773 } 774 775 DsymbolTable dst; 776 Package ppack = null; 777 778 /* If it has the extension ".c", it is a "C" file. 779 * If it has the extension ".i", it is a preprocessed "C" file. 780 */ 781 if (FileName.equalsExt(arg, c_ext) || FileName.equalsExt(arg, i_ext)) 782 { 783 filetype = FileType.c; 784 785 global.compileEnv.masm = target.os == Target.OS.Windows && !target.omfobj; // Microsoft inline assembler format 786 scope p = new CParser!AST(this, buf, cast(bool) docfile, global.errorSink, target.c, &defines, &global.compileEnv); 787 global.compileEnv.masm = false; 788 p.nextToken(); 789 checkCompiledImport(); 790 members = p.parseModule(); 791 assert(!p.md); // C doesn't have module declarations 792 numlines = p.scanloc.linnum; 793 } 794 else 795 { 796 const bool doUnittests = global.params.useUnitTests || global.params.ddoc.doOutput || global.params.dihdr.doOutput; 797 scope p = new Parser!AST(this, buf, cast(bool) docfile, global.errorSink, &global.compileEnv, doUnittests); 798 p.transitionIn = global.params.v.vin; 799 p.nextToken(); 800 p.parseModuleDeclaration(); 801 md = p.md; 802 803 if (md) 804 { 805 /* A ModuleDeclaration, md, was provided. 806 * The ModuleDeclaration sets the packages this module appears in, and 807 * the name of this module. 808 */ 809 this.ident = md.id; 810 dst = Package.resolve(md.packages, &this.parent, &ppack); 811 } 812 813 // Done after parsing the module header because `module x.y.z` may override the file name 814 checkCompiledImport(); 815 816 members = p.parseModuleContent(); 817 numlines = p.scanloc.linnum; 818 } 819 820 /* The symbol table into which the module is to be inserted. 821 */ 822 823 if (md) 824 { 825 // Mark the package path as accessible from the current module 826 // https://issues.dlang.org/show_bug.cgi?id=21661 827 // Code taken from Import.addPackageAccess() 828 if (md.packages.length > 0) 829 { 830 // module a.b.c.d; 831 auto p = ppack; // a 832 addAccessiblePackage(p, Visibility(Visibility.Kind.private_)); 833 foreach (id; md.packages[1 .. $]) // [b, c] 834 { 835 p = cast(Package) p.symtab.lookup(id); 836 if (p is null) 837 break; 838 addAccessiblePackage(p, Visibility(Visibility.Kind.private_)); 839 } 840 } 841 assert(dst); 842 Module m = ppack ? ppack.isModule() : null; 843 if (m && !isPackageFileName(m.srcfile)) 844 { 845 .error(md.loc, "package name '%s' conflicts with usage as a module name in file %s", ppack.toPrettyChars(), m.srcfile.toChars()); 846 } 847 } 848 else 849 { 850 /* The name of the module is set to the source file name. 851 * There are no packages. 852 */ 853 dst = modules; // and so this module goes into global module symbol table 854 /* Check to see if module name is a valid identifier 855 */ 856 if (!Identifier.isValidIdentifier(this.ident.toChars())) 857 error(loc, "%s `%s` has non-identifier characters in filename, use module declaration instead", kind, toPrettyChars); 858 } 859 // Insert module into the symbol table 860 Dsymbol s = this; 861 if (isPackageFile) 862 { 863 /* If the source tree is as follows: 864 * pkg/ 865 * +- package.d 866 * +- common.d 867 * the 'pkg' will be incorporated to the internal package tree in two ways: 868 * import pkg; 869 * and: 870 * import pkg.common; 871 * 872 * If both are used in one compilation, 'pkg' as a module (== pkg/package.d) 873 * and a package name 'pkg' will conflict each other. 874 * 875 * To avoid the conflict: 876 * 1. If preceding package name insertion had occurred by Package::resolve, 877 * reuse the previous wrapping 'Package' if it exists 878 * 2. Otherwise, 'package.d' wrapped by 'Package' is inserted to the internal tree in here. 879 * 880 * Then change Package::isPkgMod to PKG.module_ and set Package::mod. 881 * 882 * Note that the 'wrapping Package' is the Package that contains package.d and other submodules, 883 * the one inserted to the symbol table. 884 */ 885 auto ps = dst.lookup(ident); 886 Package p = ps ? ps.isPackage() : null; 887 if (p is null) 888 { 889 p = new Package(Loc.initial, ident); 890 p.tag = this.tag; // reuse the same package tag 891 p.symtab = new DsymbolTable(); 892 } 893 this.tag = p.tag; // reuse the 'older' package tag 894 this.pkg = p; 895 p.parent = this.parent; 896 p.isPkgMod = PKG.module_; 897 p.mod = this; 898 s = p; 899 } 900 if (!dst.insert(s)) 901 { 902 /* It conflicts with a name that is already in the symbol table. 903 * Figure out what went wrong, and issue error message. 904 */ 905 Dsymbol prev = dst.lookup(ident); 906 assert(prev); 907 if (Module mprev = prev.isModule()) 908 { 909 if (!FileName.equals(srcname, mprev.srcfile.toChars())) 910 error(loc, "%s `%s` from file %s conflicts with another module %s from file %s", kind, toPrettyChars, srcname, mprev.toChars(), mprev.srcfile.toChars()); 911 else if (isRoot() && mprev.isRoot()) 912 error(loc, "%s `%s` from file %s is specified twice on the command line", kind, toPrettyChars, srcname); 913 else 914 error(loc, "%s `%s` from file %s must be imported with 'import %s;'", kind, toPrettyChars, srcname, toPrettyChars()); 915 // https://issues.dlang.org/show_bug.cgi?id=14446 916 // Return previously parsed module to avoid AST duplication ICE. 917 return mprev; 918 } 919 else if (Package pkg = prev.isPackage()) 920 { 921 // 'package.d' loaded after a previous 'Package' insertion 922 if (isPackageFile) 923 amodules.push(this); // Add to global array of all modules 924 else 925 error(md ? md.loc : loc, "%s `%s` from file %s conflicts with package name %s", kind, toPrettyChars, srcname, pkg.toChars()); 926 } 927 else 928 assert(global.errors); 929 } 930 else 931 { 932 // Add to global array of all modules 933 amodules.push(this); 934 } 935 Compiler.onParseModule(this); 936 return this; 937 } 938 939 override void importAll(Scope* prevsc) 940 { 941 //printf("+Module::importAll(this = %p, '%s'): parent = %p\n", this, toChars(), parent); 942 if (_scope) 943 return; // already done 944 if (filetype == FileType.ddoc) 945 { 946 error(loc, "%s `%s` is a Ddoc file, cannot import it", kind, toPrettyChars); 947 return; 948 } 949 950 /* Note that modules get their own scope, from scratch. 951 * This is so regardless of where in the syntax a module 952 * gets imported, it is unaffected by context. 953 * Ignore prevsc. 954 */ 955 Scope* sc = Scope.createGlobal(this, global.errorSink); // create root scope 956 957 if (md && md.msg) 958 md.msg = semanticString(sc, md.msg, "deprecation message"); 959 960 // Add import of "object", even for the "object" module. 961 // If it isn't there, some compiler rewrites, like 962 // classinst == classinst -> .object.opEquals(classinst, classinst) 963 // would fail inside object.d. 964 if (filetype != FileType.c && 965 (members.length == 0 || 966 (*members)[0].ident != Id.object || 967 (*members)[0].isImport() is null)) 968 { 969 auto im = new Import(Loc.initial, null, Id.object, null, 0); 970 members.shift(im); 971 } 972 if (!symtab) 973 { 974 // Add all symbols into module's symbol table 975 symtab = new DsymbolTable(); 976 for (size_t i = 0; i < members.length; i++) 977 { 978 Dsymbol s = (*members)[i]; 979 s.addMember(sc, sc.scopesym); 980 } 981 } 982 // anything else should be run after addMember, so version/debug symbols are defined 983 /* Set scope for the symbols so that if we forward reference 984 * a symbol, it can possibly be resolved on the spot. 985 * If this works out well, it can be extended to all modules 986 * before any semantic() on any of them. 987 */ 988 setScope(sc); // remember module scope for semantic 989 for (size_t i = 0; i < members.length; i++) 990 { 991 Dsymbol s = (*members)[i]; 992 s.setScope(sc); 993 } 994 for (size_t i = 0; i < members.length; i++) 995 { 996 Dsymbol s = (*members)[i]; 997 s.importAll(sc); 998 } 999 sc = sc.pop(); 1000 sc.pop(); // 2 pops because Scope.createGlobal() created 2 1001 } 1002 1003 /********************************** 1004 * Determine if we need to generate an instance of ModuleInfo 1005 * for this Module. 1006 */ 1007 int needModuleInfo() 1008 { 1009 //printf("needModuleInfo() %s, %d, %d\n", toChars(), needmoduleinfo, global.params.cov); 1010 return needmoduleinfo || global.params.cov; 1011 } 1012 1013 /******************************************* 1014 * Print deprecation warning if we're deprecated, when 1015 * this module is imported from scope sc. 1016 * 1017 * Params: 1018 * sc = the scope into which we are imported 1019 * loc = the location of the import statement 1020 */ 1021 void checkImportDeprecation(const ref Loc loc, Scope* sc) 1022 { 1023 if (md && md.isdeprecated && !sc.isDeprecated) 1024 { 1025 Expression msg = md.msg; 1026 if (StringExp se = msg ? msg.toStringExp() : null) 1027 { 1028 const slice = se.peekString(); 1029 if (slice.length) 1030 { 1031 deprecation(loc, "%s `%s` is deprecated - %.*s", kind, toPrettyChars, cast(int)slice.length, slice.ptr); 1032 return; 1033 } 1034 } 1035 deprecation(loc, "%s `%s` is deprecated", kind, toPrettyChars); 1036 } 1037 } 1038 1039 override Dsymbol search(const ref Loc loc, Identifier ident, int flags = SearchLocalsOnly) 1040 { 1041 /* Since modules can be circularly referenced, 1042 * need to stop infinite recursive searches. 1043 * This is done with the cache. 1044 */ 1045 //printf("%s Module.search('%s', flags = x%x) insearch = %d\n", toChars(), ident.toChars(), flags, insearch); 1046 if (insearch) 1047 return null; 1048 1049 /* Qualified module searches always search their imports, 1050 * even if SearchLocalsOnly 1051 */ 1052 if (!(flags & SearchUnqualifiedModule)) 1053 flags &= ~(SearchUnqualifiedModule | SearchLocalsOnly); 1054 1055 if (searchCacheIdent == ident && searchCacheFlags == flags) 1056 { 1057 //printf("%s Module::search('%s', flags = %d) insearch = %d searchCacheSymbol = %s\n", 1058 // toChars(), ident.toChars(), flags, insearch, searchCacheSymbol ? searchCacheSymbol.toChars() : "null"); 1059 return searchCacheSymbol; 1060 } 1061 1062 uint errors = global.errors; 1063 1064 insearch = true; 1065 Dsymbol s = ScopeDsymbol.search(loc, ident, flags); 1066 insearch = false; 1067 1068 if (errors == global.errors) 1069 { 1070 // https://issues.dlang.org/show_bug.cgi?id=10752 1071 // Can cache the result only when it does not cause 1072 // access error so the side-effect should be reproduced in later search. 1073 searchCacheIdent = ident; 1074 searchCacheSymbol = s; 1075 searchCacheFlags = flags; 1076 } 1077 return s; 1078 } 1079 1080 override bool isPackageAccessible(Package p, Visibility visibility, int flags = 0) 1081 { 1082 if (insearch) // don't follow import cycles 1083 return false; 1084 insearch = true; 1085 scope (exit) 1086 insearch = false; 1087 if (flags & IgnorePrivateImports) 1088 visibility = Visibility(Visibility.Kind.public_); // only consider public imports 1089 return super.isPackageAccessible(p, visibility); 1090 } 1091 1092 override Dsymbol symtabInsert(Dsymbol s) 1093 { 1094 searchCacheIdent = null; // symbol is inserted, so invalidate cache 1095 return Package.symtabInsert(s); 1096 } 1097 1098 extern (D) void deleteObjFile() 1099 { 1100 if (global.params.obj) 1101 File.remove(objfile.toChars()); 1102 if (docfile) 1103 File.remove(docfile.toChars()); 1104 } 1105 1106 /******************************************* 1107 * Can't run semantic on s now, try again later. 1108 */ 1109 extern (D) static void addDeferredSemantic(Dsymbol s) 1110 { 1111 //printf("Module::addDeferredSemantic('%s')\n", s.toChars()); 1112 if (!deferred.contains(s)) 1113 deferred.push(s); 1114 } 1115 1116 extern (D) static void addDeferredSemantic2(Dsymbol s) 1117 { 1118 //printf("Module::addDeferredSemantic2('%s')\n", s.toChars()); 1119 if (!deferred2.contains(s)) 1120 deferred2.push(s); 1121 } 1122 1123 extern (D) static void addDeferredSemantic3(Dsymbol s) 1124 { 1125 //printf("Module::addDeferredSemantic3('%s')\n", s.toChars()); 1126 if (!deferred.contains(s)) 1127 deferred3.push(s); 1128 } 1129 1130 /****************************************** 1131 * Run semantic() on deferred symbols. 1132 */ 1133 static void runDeferredSemantic() 1134 { 1135 __gshared int nested; 1136 if (nested) 1137 return; 1138 //if (deferred.length) printf("+Module::runDeferredSemantic(), len = %ld\n", deferred.length); 1139 nested++; 1140 1141 size_t len; 1142 do 1143 { 1144 len = deferred.length; 1145 if (!len) 1146 break; 1147 1148 Dsymbol* todo; 1149 Dsymbol* todoalloc = null; 1150 Dsymbol tmp; 1151 if (len == 1) 1152 { 1153 todo = &tmp; 1154 } 1155 else 1156 { 1157 todo = cast(Dsymbol*)Mem.check(malloc(len * Dsymbol.sizeof)); 1158 todoalloc = todo; 1159 } 1160 memcpy(todo, deferred.tdata(), len * Dsymbol.sizeof); 1161 deferred.setDim(0); 1162 1163 foreach (i; 0..len) 1164 { 1165 Dsymbol s = todo[i]; 1166 s.dsymbolSemantic(null); 1167 //printf("deferred: %s, parent = %s\n", s.toChars(), s.parent.toChars()); 1168 } 1169 //printf("\tdeferred.length = %ld, len = %ld\n", deferred.length, len); 1170 if (todoalloc) 1171 free(todoalloc); 1172 } 1173 while (deferred.length != len); // while making progress 1174 nested--; 1175 //printf("-Module::runDeferredSemantic(), len = %ld\n", deferred.length); 1176 } 1177 1178 static void runDeferredSemantic2() 1179 { 1180 Module.runDeferredSemantic(); 1181 1182 Dsymbols* a = &Module.deferred2; 1183 for (size_t i = 0; i < a.length; i++) 1184 { 1185 Dsymbol s = (*a)[i]; 1186 //printf("[%d] %s semantic2a\n", i, s.toPrettyChars()); 1187 s.semantic2(null); 1188 1189 if (global.errors) 1190 break; 1191 } 1192 a.setDim(0); 1193 } 1194 1195 static void runDeferredSemantic3() 1196 { 1197 Module.runDeferredSemantic2(); 1198 1199 Dsymbols* a = &Module.deferred3; 1200 for (size_t i = 0; i < a.length; i++) 1201 { 1202 Dsymbol s = (*a)[i]; 1203 //printf("[%d] %s semantic3a\n", i, s.toPrettyChars()); 1204 s.semantic3(null); 1205 1206 if (global.errors) 1207 break; 1208 } 1209 a.setDim(0); 1210 } 1211 1212 extern (D) static void clearCache() nothrow 1213 { 1214 foreach (Module m; amodules) 1215 m.searchCacheIdent = null; 1216 } 1217 1218 /************************************ 1219 * Recursively look at every module this module imports, 1220 * return true if it imports m. 1221 * Can be used to detect circular imports. 1222 */ 1223 int imports(Module m) nothrow 1224 { 1225 //printf("%s Module::imports(%s)\n", toChars(), m.toChars()); 1226 version (none) 1227 { 1228 foreach (i, Module mi; aimports) 1229 printf("\t[%d] %s\n", cast(int) i, mi.toChars()); 1230 } 1231 foreach (Module mi; aimports) 1232 { 1233 if (mi == m) 1234 return true; 1235 if (!mi.insearch) 1236 { 1237 mi.insearch = true; 1238 int r = mi.imports(m); 1239 if (r) 1240 return r; 1241 } 1242 } 1243 return false; 1244 } 1245 1246 bool isRoot() nothrow 1247 { 1248 return this.importedFrom == this; 1249 } 1250 1251 /// Returns: Whether this module is in the `core` package and has name `ident` 1252 bool isCoreModule(Identifier ident) nothrow 1253 { 1254 return this.ident == ident && parent && parent.ident == Id.core && !parent.parent; 1255 } 1256 1257 // Back end 1258 int doppelganger; // sub-module 1259 Symbol* cov; // private uint[] __coverage; 1260 uint[] covb; // bit array of valid code line numbers 1261 Symbol* sictor; // module order independent constructor 1262 Symbol* sctor; // module constructor 1263 Symbol* sdtor; // module destructor 1264 Symbol* ssharedctor; // module shared constructor 1265 Symbol* sshareddtor; // module shared destructor 1266 Symbol* stest; // module unit test 1267 Symbol* sfilename; // symbol for filename 1268 1269 uint[uint] ctfe_cov; /// coverage information from ctfe execution_count[line] 1270 1271 override inout(Module) isModule() inout nothrow 1272 { 1273 return this; 1274 } 1275 1276 override void accept(Visitor v) 1277 { 1278 v.visit(this); 1279 } 1280 1281 /*********************************************** 1282 * Writes this module's fully-qualified name to buf 1283 * Params: 1284 * buf = The buffer to write to 1285 */ 1286 void fullyQualifiedName(ref OutBuffer buf) nothrow 1287 { 1288 buf.writestring(ident.toString()); 1289 1290 for (auto package_ = parent; package_ !is null; package_ = package_.parent) 1291 { 1292 buf.prependstring("."); 1293 buf.prependstring(package_.ident.toChars()); 1294 } 1295 } 1296 1297 /** Lazily initializes and returns the escape table. 1298 Turns out it eats a lot of memory. 1299 */ 1300 extern(D) Escape* escapetable() nothrow 1301 { 1302 if (!_escapetable) 1303 _escapetable = new Escape(); 1304 return _escapetable; 1305 } 1306 1307 /**************************** 1308 * A Singleton that loads core.stdc.config 1309 * Returns: 1310 * Module of core.stdc.config, null if couldn't find it 1311 */ 1312 extern (D) static Module loadCoreStdcConfig() 1313 { 1314 __gshared Module core_stdc_config; 1315 auto pkgids = new Identifier[2]; 1316 pkgids[0] = Id.core; 1317 pkgids[1] = Id.stdc; 1318 return loadModuleFromLibrary(core_stdc_config, pkgids, Id.config); 1319 } 1320 1321 /**************************** 1322 * A Singleton that loads core.atomic 1323 * Returns: 1324 * Module of core.atomic, null if couldn't find it 1325 */ 1326 extern (D) static Module loadCoreAtomic() 1327 { 1328 __gshared Module core_atomic; 1329 auto pkgids = new Identifier[1]; 1330 pkgids[0] = Id.core; 1331 return loadModuleFromLibrary(core_atomic, pkgids, Id.atomic); 1332 } 1333 1334 /**************************** 1335 * A Singleton that loads std.math 1336 * Returns: 1337 * Module of std.math, null if couldn't find it 1338 */ 1339 extern (D) static Module loadStdMath() 1340 { 1341 __gshared Module std_math; 1342 auto pkgids = new Identifier[1]; 1343 pkgids[0] = Id.std; 1344 return loadModuleFromLibrary(std_math, pkgids, Id.math); 1345 } 1346 1347 /********************************** 1348 * Load a Module from the library. 1349 * Params: 1350 * mod = cached return value of this call 1351 * pkgids = package identifiers 1352 * modid = module id 1353 * Returns: 1354 * Module loaded, null if cannot load it 1355 */ 1356 extern (D) private static Module loadModuleFromLibrary(ref Module mod, Identifier[] pkgids, Identifier modid) 1357 { 1358 if (mod) 1359 return mod; 1360 1361 auto imp = new Import(Loc.initial, pkgids[], modid, null, true); 1362 // Module.load will call fatal() if there's no module available. 1363 // Gag the error here, pushing the error handling to the caller. 1364 const errors = global.startGagging(); 1365 imp.load(null); 1366 if (imp.mod) 1367 { 1368 imp.mod.importAll(null); 1369 imp.mod.dsymbolSemantic(null); 1370 } 1371 global.endGagging(errors); 1372 mod = imp.mod; 1373 return mod; 1374 } 1375 } 1376 1377 /*********************************************************** 1378 */ 1379 extern (C++) struct ModuleDeclaration 1380 { 1381 Loc loc; 1382 Identifier id; 1383 Identifier[] packages; // array of Identifier's representing packages 1384 bool isdeprecated; // if it is a deprecated module 1385 Expression msg; 1386 1387 extern (D) this(const ref Loc loc, Identifier[] packages, Identifier id, Expression msg, bool isdeprecated) @safe 1388 { 1389 this.loc = loc; 1390 this.packages = packages; 1391 this.id = id; 1392 this.msg = msg; 1393 this.isdeprecated = isdeprecated; 1394 } 1395 1396 extern (C++) const(char)* toChars() const @safe 1397 { 1398 OutBuffer buf; 1399 foreach (pid; packages) 1400 { 1401 buf.writestring(pid.toString()); 1402 buf.writeByte('.'); 1403 } 1404 buf.writestring(id.toString()); 1405 return buf.extractChars(); 1406 } 1407 1408 /// Provide a human readable representation 1409 extern (D) const(char)[] toString() const 1410 { 1411 return this.toChars().toDString; 1412 } 1413 } 1414 1415 /**************************************** 1416 * Create array of the local classes in the Module, suitable 1417 * for inclusion in ModuleInfo 1418 * Params: 1419 * mod = the Module 1420 * aclasses = array to fill in 1421 * Returns: array of local classes 1422 */ 1423 extern (C++) void getLocalClasses(Module mod, ref ClassDeclarations aclasses) 1424 { 1425 //printf("members.length = %d\n", mod.members.length); 1426 int pushAddClassDg(size_t n, Dsymbol sm) 1427 { 1428 if (!sm) 1429 return 0; 1430 1431 if (auto cd = sm.isClassDeclaration()) 1432 { 1433 // compatibility with previous algorithm 1434 if (cd.parent && cd.parent.isTemplateMixin()) 1435 return 0; 1436 1437 if (cd.classKind != ClassKind.objc) 1438 aclasses.push(cd); 1439 } 1440 return 0; 1441 } 1442 1443 ScopeDsymbol._foreach(null, mod.members, &pushAddClassDg); 1444 } 1445 1446 /** 1447 * Process the content of a source file 1448 * 1449 * Attempts to find which encoding it is using, if it has BOM, 1450 * and then normalize the source to UTF-8. If no encoding is required, 1451 * a slice of `src` will be returned without extra allocation. 1452 * 1453 * Params: 1454 * src = Content of the source file to process 1455 * mod = Module matching `src`, used for error handling 1456 * 1457 * Returns: 1458 * UTF-8 encoded variant of `src`, stripped of any BOM, 1459 * or `null` if an error happened. 1460 */ 1461 private const(char)[] processSource (const(ubyte)[] src, Module mod) 1462 { 1463 enum SourceEncoding { utf16, utf32} 1464 enum Endian { little, big} 1465 1466 /* 1467 * Convert a buffer from UTF32 to UTF8 1468 * Params: 1469 * Endian = is the buffer big/little endian 1470 * buf = buffer of UTF32 data 1471 * Returns: 1472 * input buffer reencoded as UTF8 1473 */ 1474 1475 char[] UTF32ToUTF8(Endian endian)(const(char)[] buf) 1476 { 1477 static if (endian == Endian.little) 1478 alias readNext = Port.readlongLE; 1479 else 1480 alias readNext = Port.readlongBE; 1481 1482 if (buf.length & 3) 1483 { 1484 .error(mod.loc, "%s `%s` odd length of UTF-32 char source %llu", 1485 mod.kind, mod.toPrettyChars, cast(ulong) buf.length); 1486 return null; 1487 } 1488 1489 const (uint)[] eBuf = cast(const(uint)[])buf; 1490 1491 OutBuffer dbuf; 1492 dbuf.reserve(eBuf.length); 1493 1494 foreach (i; 0 .. eBuf.length) 1495 { 1496 const u = readNext(&eBuf[i]); 1497 if (u & ~0x7F) 1498 { 1499 if (u > 0x10FFFF) 1500 { 1501 .error(mod.loc, "%s `%s` UTF-32 value %08x greater than 0x10FFFF", mod.kind, mod.toPrettyChars, u); 1502 return null; 1503 } 1504 dbuf.writeUTF8(u); 1505 } 1506 else 1507 dbuf.writeByte(u); 1508 } 1509 dbuf.writeByte(0); //add null terminator 1510 return dbuf.extractSlice(); 1511 } 1512 1513 /* 1514 * Convert a buffer from UTF16 to UTF8 1515 * Params: 1516 * Endian = is the buffer big/little endian 1517 * buf = buffer of UTF16 data 1518 * Returns: 1519 * input buffer reencoded as UTF8 1520 */ 1521 1522 char[] UTF16ToUTF8(Endian endian)(const(char)[] buf) 1523 { 1524 static if (endian == Endian.little) 1525 alias readNext = Port.readwordLE; 1526 else 1527 alias readNext = Port.readwordBE; 1528 1529 if (buf.length & 1) 1530 { 1531 .error(mod.loc, "%s `%s` odd length of UTF-16 char source %llu", mod.kind, mod.toPrettyChars, cast(ulong) buf.length); 1532 return null; 1533 } 1534 1535 const (ushort)[] eBuf = cast(const(ushort)[])buf; 1536 1537 OutBuffer dbuf; 1538 dbuf.reserve(eBuf.length); 1539 1540 //i will be incremented in the loop for high codepoints 1541 foreach (ref i; 0 .. eBuf.length) 1542 { 1543 uint u = readNext(&eBuf[i]); 1544 if (u & ~0x7F) 1545 { 1546 if (0xD800 <= u && u < 0xDC00) 1547 { 1548 i++; 1549 if (i >= eBuf.length) 1550 { 1551 .error(mod.loc, "%s `%s` surrogate UTF-16 high value %04x at end of file", mod.kind, mod.toPrettyChars, u); 1552 return null; 1553 } 1554 const u2 = readNext(&eBuf[i]); 1555 if (u2 < 0xDC00 || 0xE000 <= u2) 1556 { 1557 .error(mod.loc, "%s `%s` surrogate UTF-16 low value %04x out of range", mod.kind, mod.toPrettyChars, u2); 1558 return null; 1559 } 1560 u = (u - 0xD7C0) << 10; 1561 u |= (u2 - 0xDC00); 1562 } 1563 else if (u >= 0xDC00 && u <= 0xDFFF) 1564 { 1565 .error(mod.loc, "%s `%s` unpaired surrogate UTF-16 value %04x", mod.kind, mod.toPrettyChars, u); 1566 return null; 1567 } 1568 else if (u == 0xFFFE || u == 0xFFFF) 1569 { 1570 .error(mod.loc, "%s `%s` illegal UTF-16 value %04x", mod.kind, mod.toPrettyChars, u); 1571 return null; 1572 } 1573 dbuf.writeUTF8(u); 1574 } 1575 else 1576 dbuf.writeByte(u); 1577 } 1578 dbuf.writeByte(0); //add a terminating null byte 1579 return dbuf.extractSlice(); 1580 } 1581 1582 const(char)[] buf = cast(const(char)[]) src; 1583 1584 // Assume the buffer is from memory and has not be read from disk. Assume UTF-8. 1585 if (buf.length < 2) 1586 return buf; 1587 1588 /* Convert all non-UTF-8 formats to UTF-8. 1589 * BOM : https://www.unicode.org/faq/utf_bom.html 1590 * 00 00 FE FF UTF-32BE, big-endian 1591 * FF FE 00 00 UTF-32LE, little-endian 1592 * FE FF UTF-16BE, big-endian 1593 * FF FE UTF-16LE, little-endian 1594 * EF BB BF UTF-8 1595 */ 1596 if (buf[0] == 0xFF && buf[1] == 0xFE) 1597 { 1598 if (buf.length >= 4 && buf[2] == 0 && buf[3] == 0) 1599 return UTF32ToUTF8!(Endian.little)(buf[4 .. $]); 1600 return UTF16ToUTF8!(Endian.little)(buf[2 .. $]); 1601 } 1602 1603 if (buf[0] == 0xFE && buf[1] == 0xFF) 1604 return UTF16ToUTF8!(Endian.big)(buf[2 .. $]); 1605 1606 if (buf.length >= 4 && buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && buf[3] == 0xFF) 1607 return UTF32ToUTF8!(Endian.big)(buf[4 .. $]); 1608 1609 if (buf.length >= 3 && buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) 1610 return buf[3 .. $]; 1611 1612 /* There is no BOM. Make use of Arcane Jill's insight that 1613 * the first char of D source must be ASCII to 1614 * figure out the encoding. 1615 */ 1616 if (buf.length >= 4 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0) 1617 return UTF32ToUTF8!(Endian.little)(buf); 1618 if (buf.length >= 4 && buf[0] == 0 && buf[1] == 0 && buf[2] == 0) 1619 return UTF32ToUTF8!(Endian.big)(buf); 1620 // try to check for UTF-16 1621 if (buf.length >= 2 && buf[1] == 0) 1622 return UTF16ToUTF8!(Endian.little)(buf); 1623 if (buf[0] == 0) 1624 return UTF16ToUTF8!(Endian.big)(buf); 1625 1626 // It's UTF-8 1627 if (buf[0] >= 0x80) 1628 { 1629 auto loc = mod.getLoc(); 1630 .error(loc, "%s `%s` source file must start with BOM or ASCII character, not \\x%02X", mod.kind, mod.toPrettyChars, buf[0]); 1631 return null; 1632 } 1633 1634 return buf; 1635 }