1 /** 2 * Defines a package and module. 3 * 4 * Specification: $(LINK2 https://dlang.org/spec/module.html, Modules) 5 * 6 * Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved 7 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 8 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/dmodule.d, _dmodule.d) 10 * Documentation: https://dlang.org/phobos/dmd_dmodule.html 11 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/dmodule.d 12 */ 13 14 module dmd.dmodule; 15 16 import core.stdc.stdio; 17 import core.stdc.stdlib; 18 import core.stdc.string; 19 import dmd.aggregate; 20 import dmd.arraytypes; 21 import dmd.astcodegen; 22 import dmd.astenums; 23 import dmd.compiler; 24 import dmd.gluelayer; 25 import dmd.dimport; 26 import dmd.dmacro; 27 import dmd.doc; 28 import dmd.dscope; 29 import dmd.dsymbol; 30 import dmd.dsymbolsem; 31 import dmd.errors; 32 import dmd.errorsink; 33 import dmd.expression; 34 import dmd.expressionsem; 35 import dmd.file_manager; 36 import dmd.globals; 37 import dmd.id; 38 import dmd.identifier; 39 import dmd.location; 40 import dmd.parse; 41 import dmd.cparse; 42 import dmd.root.array; 43 import dmd.root.file; 44 import dmd.root.filename; 45 import dmd.common.outbuffer; 46 import dmd.root.port; 47 import dmd.root.rmem; 48 import dmd.root.rootobject; 49 import dmd.root.string; 50 import dmd.semantic2; 51 import dmd.semantic3; 52 import dmd.target; 53 import dmd.utils; 54 import dmd.visitor; 55 56 // function used to call semantic3 on a module's dependencies 57 void semantic3OnDependencies(Module m) 58 { 59 if (!m) 60 return; 61 62 if (m.semanticRun > PASS.semantic3) 63 return; 64 65 m.semantic3(null); 66 67 foreach (i; 1 .. m.aimports.length) 68 semantic3OnDependencies(m.aimports[i]); 69 } 70 71 /** 72 * Remove generated .di files on error and exit 73 */ 74 void removeHdrFilesAndFail(ref Param params, ref Modules modules) nothrow 75 { 76 if (params.dihdr.doOutput) 77 { 78 foreach (m; modules) 79 { 80 if (m.filetype == FileType.dhdr) 81 continue; 82 File.remove(m.hdrfile.toChars()); 83 } 84 } 85 86 fatal(); 87 } 88 89 /** 90 * Converts a chain of identifiers to the filename of the module 91 * 92 * Params: 93 * packages = the names of the "parent" packages 94 * ident = the name of the child package or module 95 * 96 * Returns: 97 * the filename of the child package or module 98 */ 99 private const(char)[] getFilename(Identifier[] packages, Identifier ident) nothrow 100 { 101 const(char)[] filename = ident.toString(); 102 103 OutBuffer buf; 104 OutBuffer dotmods; 105 auto modAliases = &global.params.modFileAliasStrings; 106 107 if (packages.length == 0 && modAliases.length == 0) 108 return filename; 109 110 void checkModFileAlias(const(char)[] p) 111 { 112 /* Check and replace the contents of buf[] with 113 * an alias string from global.params.modFileAliasStrings[] 114 */ 115 dotmods.writestring(p); 116 foreach_reverse (const m; *modAliases) 117 { 118 const q = strchr(m, '='); 119 assert(q); 120 if (dotmods.length == q - m && memcmp(dotmods.peekChars(), m, q - m) == 0) 121 { 122 buf.setsize(0); 123 auto rhs = q[1 .. strlen(q)]; 124 if (rhs.length > 0 && (rhs[$ - 1] == '/' || rhs[$ - 1] == '\\')) 125 rhs = rhs[0 .. $ - 1]; // remove trailing separator 126 buf.writestring(rhs); 127 break; // last matching entry in ms[] wins 128 } 129 } 130 dotmods.writeByte('.'); 131 } 132 133 foreach (pid; packages) 134 { 135 const p = pid.toString(); 136 buf.writestring(p); 137 if (modAliases.length) 138 checkModFileAlias(p); 139 version (Windows) 140 enum FileSeparator = '\\'; 141 else 142 enum FileSeparator = '/'; 143 buf.writeByte(FileSeparator); 144 } 145 buf.writestring(filename); 146 if (modAliases.length) 147 checkModFileAlias(filename); 148 buf.writeByte(0); 149 filename = buf.extractSlice()[0 .. $ - 1]; 150 151 return filename; 152 } 153 154 /*********************************************************** 155 */ 156 extern (C++) class Package : ScopeDsymbol 157 { 158 PKG isPkgMod = PKG.unknown; 159 uint tag; // auto incremented tag, used to mask package tree in scopes 160 Module mod; // !=null if isPkgMod == PKG.module_ 161 162 final extern (D) this(const ref Loc loc, Identifier ident) nothrow 163 { 164 super(loc, ident); 165 __gshared uint packageTag; 166 this.tag = packageTag++; 167 } 168 169 override const(char)* kind() const nothrow 170 { 171 return "package"; 172 } 173 174 override bool equals(const RootObject o) const 175 { 176 // custom 'equals' for bug 17441. "package a" and "module a" are not equal 177 if (this == o) 178 return true; 179 auto p = cast(Package)o; 180 return p && isModule() == p.isModule() && ident.equals(p.ident); 181 } 182 183 /**************************************************** 184 * Input: 185 * packages[] the pkg1.pkg2 of pkg1.pkg2.mod 186 * Returns: 187 * the symbol table that mod should be inserted into 188 * Output: 189 * *pparent the rightmost package, i.e. pkg2, or NULL if no packages 190 * *ppkg the leftmost package, i.e. pkg1, or NULL if no packages 191 */ 192 extern (D) static DsymbolTable resolve(Identifier[] packages, Dsymbol* pparent, Package* ppkg) 193 { 194 DsymbolTable dst = Module.modules; 195 Dsymbol parent = null; 196 //printf("Package::resolve()\n"); 197 if (ppkg) 198 *ppkg = null; 199 foreach (pid; packages) 200 { 201 Package pkg; 202 Dsymbol p = dst.lookup(pid); 203 if (!p) 204 { 205 pkg = new Package(Loc.initial, pid); 206 dst.insert(pkg); 207 pkg.parent = parent; 208 pkg.symtab = new DsymbolTable(); 209 } 210 else 211 { 212 pkg = p.isPackage(); 213 assert(pkg); 214 // It might already be a module, not a package, but that needs 215 // to be checked at a higher level, where a nice error message 216 // can be generated. 217 // dot net needs modules and packages with same name 218 // But we still need a symbol table for it 219 if (!pkg.symtab) 220 pkg.symtab = new DsymbolTable(); 221 } 222 parent = pkg; 223 dst = pkg.symtab; 224 if (ppkg && !*ppkg) 225 *ppkg = pkg; 226 if (pkg.isModule()) 227 { 228 // Return the module so that a nice error message can be generated 229 if (ppkg) 230 *ppkg = cast(Package)p; 231 break; 232 } 233 } 234 235 if (pparent) 236 *pparent = parent; 237 return dst; 238 } 239 240 override final inout(Package) isPackage() inout 241 { 242 return this; 243 } 244 245 /** 246 * Checks if pkg is a sub-package of this 247 * 248 * For example, if this qualifies to 'a1.a2' and pkg - to 'a1.a2.a3', 249 * this function returns 'true'. If it is other way around or qualified 250 * package paths conflict function returns 'false'. 251 * 252 * Params: 253 * pkg = possible subpackage 254 * 255 * Returns: 256 * see description 257 */ 258 final bool isAncestorPackageOf(const Package pkg) const 259 { 260 if (this == pkg) 261 return true; 262 if (!pkg || !pkg.parent) 263 return false; 264 return isAncestorPackageOf(pkg.parent.isPackage()); 265 } 266 267 override Dsymbol search(const ref Loc loc, Identifier ident, int flags = SearchLocalsOnly) 268 { 269 //printf("%s Package.search('%s', flags = x%x)\n", toChars(), ident.toChars(), flags); 270 flags &= ~SearchLocalsOnly; // searching an import is always transitive 271 if (!isModule() && mod) 272 { 273 // Prefer full package name. 274 Dsymbol s = symtab ? symtab.lookup(ident) : null; 275 if (s) 276 return s; 277 //printf("[%s] through pkdmod: %s\n", loc.toChars(), toChars()); 278 return mod.search(loc, ident, flags); 279 } 280 return ScopeDsymbol.search(loc, ident, flags); 281 } 282 283 override void accept(Visitor v) 284 { 285 v.visit(this); 286 } 287 288 final Module isPackageMod() 289 { 290 if (isPkgMod == PKG.module_) 291 { 292 return mod; 293 } 294 return null; 295 } 296 297 /** 298 * Checks for the existence of a package.d to set isPkgMod appropriately 299 * if isPkgMod == PKG.unknown 300 */ 301 final void resolvePKGunknown() 302 { 303 if (isModule()) 304 return; 305 if (isPkgMod != PKG.unknown) 306 return; 307 308 Identifier[] packages; 309 for (Dsymbol s = this.parent; s; s = s.parent) 310 packages ~= s.ident; 311 reverse(packages); 312 313 if (Module.find(getFilename(packages, ident))) 314 Module.load(Loc.initial, packages, this.ident); 315 else 316 isPkgMod = PKG.package_; 317 } 318 } 319 320 /*********************************************************** 321 */ 322 extern (C++) final class Module : Package 323 { 324 extern (C++) __gshared Module rootModule; 325 extern (C++) __gshared DsymbolTable modules; // symbol table of all modules 326 extern (C++) __gshared Modules amodules; // array of all modules 327 extern (C++) __gshared Dsymbols deferred; // deferred Dsymbol's needing semantic() run on them 328 extern (C++) __gshared Dsymbols deferred2; // deferred Dsymbol's needing semantic2() run on them 329 extern (C++) __gshared Dsymbols deferred3; // deferred Dsymbol's needing semantic3() run on them 330 331 static void _init() 332 { 333 modules = new DsymbolTable(); 334 } 335 336 /** 337 * Deinitializes the global state of the compiler. 338 * 339 * This can be used to restore the state set by `_init` to its original 340 * state. 341 */ 342 static void deinitialize() 343 { 344 modules = modules.init; 345 } 346 347 extern (C++) __gshared AggregateDeclaration moduleinfo; 348 349 const(char)[] arg; // original argument name 350 ModuleDeclaration* md; // if !=null, the contents of the ModuleDeclaration declaration 351 const FileName srcfile; // input source file 352 const FileName objfile; // output .obj file 353 const FileName hdrfile; // 'header' file 354 FileName docfile; // output documentation file 355 const(ubyte)[] src; /// Raw content of the file 356 uint errors; // if any errors in file 357 uint numlines; // number of lines in source file 358 FileType filetype; // source file type 359 bool hasAlwaysInlines; // contains references to functions that must be inlined 360 bool isPackageFile; // if it is a package.d 361 Package pkg; // if isPackageFile is true, the Package that contains this package.d 362 Strings contentImportedFiles; // array of files whose content was imported 363 int needmoduleinfo; 364 private ThreeState selfimports; 365 private ThreeState rootimports; 366 Dsymbol[void*] tagSymTab; /// ImportC: tag symbols that conflict with other symbols used as the index 367 368 private OutBuffer defines; // collect all the #define lines here 369 370 371 /************************************* 372 * Return true if module imports itself. 373 */ 374 bool selfImports() 375 { 376 //printf("Module::selfImports() %s\n", toChars()); 377 if (selfimports == ThreeState.none) 378 { 379 foreach (Module m; amodules) 380 m.insearch = false; 381 selfimports = imports(this) ? ThreeState.yes : ThreeState.no; 382 foreach (Module m; amodules) 383 m.insearch = false; 384 } 385 return selfimports == ThreeState.yes; 386 } 387 388 /************************************* 389 * Return true if module imports root module. 390 */ 391 bool rootImports() 392 { 393 //printf("Module::rootImports() %s\n", toChars()); 394 if (rootimports == ThreeState.none) 395 { 396 foreach (Module m; amodules) 397 m.insearch = false; 398 rootimports = ThreeState.no; 399 foreach (Module m; amodules) 400 { 401 if (m.isRoot() && imports(m)) 402 { 403 rootimports = ThreeState.yes; 404 break; 405 } 406 } 407 foreach (Module m; amodules) 408 m.insearch = false; 409 } 410 return rootimports == ThreeState.yes; 411 } 412 413 private Identifier searchCacheIdent; 414 private Dsymbol searchCacheSymbol; // cached value of search 415 private int searchCacheFlags; // cached flags 416 private bool insearch; 417 418 /** 419 * A root module is one that will be compiled all the way to 420 * object code. This field holds the root module that caused 421 * this module to be loaded. If this module is a root module, 422 * then it will be set to `this`. This is used to determine 423 * ownership of template instantiation. 424 */ 425 Module importedFrom; 426 427 Dsymbols* decldefs; // top level declarations for this Module 428 429 Modules aimports; // all imported modules 430 431 uint debuglevel; // debug level 432 Identifiers* debugids; // debug identifiers 433 Identifiers* debugidsNot; // forward referenced debug identifiers 434 435 uint versionlevel; // version level 436 Identifiers* versionids; // version identifiers 437 Identifiers* versionidsNot; // forward referenced version identifiers 438 439 MacroTable macrotable; // document comment macros 440 Escape* _escapetable; // document comment escapes 441 442 size_t nameoffset; // offset of module name from start of ModuleInfo 443 size_t namelen; // length of module name in characters 444 445 extern (D) this(const ref Loc loc, const(char)[] filename, Identifier ident, int doDocComment, int doHdrGen) 446 { 447 super(loc, ident); 448 const(char)[] srcfilename; 449 //printf("Module::Module(filename = '%.*s', ident = '%s')\n", cast(int)filename.length, filename.ptr, ident.toChars()); 450 this.arg = filename; 451 srcfilename = FileName.defaultExt(filename, mars_ext); 452 if (target.run_noext && global.params.run && 453 !FileName.ext(filename) && 454 FileName.exists(srcfilename) == 0 && 455 FileName.exists(filename) == 1) 456 { 457 FileName.free(srcfilename.ptr); 458 srcfilename = FileName.removeExt(filename); // just does a mem.strdup(filename) 459 } 460 else if (!FileName.equalsExt(srcfilename, mars_ext) && 461 !FileName.equalsExt(srcfilename, hdr_ext) && 462 !FileName.equalsExt(srcfilename, c_ext) && 463 !FileName.equalsExt(srcfilename, i_ext) && 464 !FileName.equalsExt(srcfilename, dd_ext)) 465 { 466 467 error("source file name '%.*s' must have .%.*s extension", 468 cast(int)srcfilename.length, srcfilename.ptr, 469 cast(int)mars_ext.length, mars_ext.ptr); 470 fatal(); 471 } 472 473 srcfile = FileName(srcfilename); 474 objfile = setOutfilename(global.params.objname, global.params.objdir, filename, target.obj_ext); 475 if (doDocComment) 476 setDocfile(); 477 if (doHdrGen) 478 hdrfile = setOutfilename(global.params.dihdr.name, global.params.dihdr.dir, arg, hdr_ext); 479 } 480 481 extern (D) this(const(char)[] filename, Identifier ident, int doDocComment, int doHdrGen) 482 { 483 this(Loc.initial, filename, ident, doDocComment, doHdrGen); 484 } 485 486 static Module create(const(char)* filename, Identifier ident, int doDocComment, int doHdrGen) 487 { 488 return create(filename.toDString, ident, doDocComment, doHdrGen); 489 } 490 491 extern (D) static Module create(const(char)[] filename, Identifier ident, int doDocComment, int doHdrGen) 492 { 493 return new Module(Loc.initial, filename, ident, doDocComment, doHdrGen); 494 } 495 496 static const(char)* find(const(char)* filename) 497 { 498 return find(filename.toDString).ptr; 499 } 500 501 extern (D) static const(char)[] find(const(char)[] filename) 502 { 503 return global.fileManager.lookForSourceFile(filename, global.path ? (*global.path)[] : null); 504 } 505 506 extern (C++) static Module load(const ref Loc loc, Identifiers* packages, Identifier ident) 507 { 508 return load(loc, packages ? (*packages)[] : null, ident); 509 } 510 511 extern (D) static Module load(const ref Loc loc, Identifier[] packages, Identifier ident) 512 { 513 //printf("Module::load(ident = '%s')\n", ident.toChars()); 514 // Build module filename by turning: 515 // foo.bar.baz 516 // into: 517 // foo\bar\baz 518 const(char)[] filename = getFilename(packages, ident); 519 // Look for the source file 520 if (const result = find(filename)) 521 filename = result; // leaks 522 523 auto m = new Module(loc, filename, ident, 0, 0); 524 525 if (!m.read(loc)) 526 return null; 527 if (global.params.verbose) 528 { 529 OutBuffer buf; 530 foreach (pid; packages) 531 { 532 buf.writestring(pid.toString()); 533 buf.writeByte('.'); 534 } 535 buf.printf("%s\t(%s)", ident.toChars(), m.srcfile.toChars()); 536 message("import %s", buf.peekChars()); 537 } 538 if((m = m.parse()) is null) return null; 539 540 return m; 541 } 542 543 override const(char)* kind() const 544 { 545 return "module"; 546 } 547 548 /********************************************* 549 * Combines things into output file name for .html and .di files. 550 * Input: 551 * name Command line name given for the file, NULL if none 552 * dir Command line directory given for the file, NULL if none 553 * arg Name of the source file 554 * ext File name extension to use if 'name' is NULL 555 * global.params.preservePaths get output path from arg 556 * srcfile Input file - output file name must not match input file 557 */ 558 extern(D) FileName setOutfilename(const(char)[] name, const(char)[] dir, const(char)[] arg, const(char)[] ext) 559 { 560 const(char)[] docfilename; 561 if (name) 562 { 563 docfilename = name; 564 } 565 else 566 { 567 const(char)[] argdoc; 568 OutBuffer buf; 569 if (arg == "__stdin.d") 570 { 571 version (Posix) 572 import core.sys.posix.unistd : getpid; 573 else version (Windows) 574 import core.sys.windows.winbase : getpid = GetCurrentProcessId; 575 buf.printf("__stdin_%d.d", getpid()); 576 arg = buf[]; 577 } 578 if (global.params.preservePaths) 579 argdoc = arg; 580 else 581 argdoc = FileName.name(arg); 582 // If argdoc doesn't have an absolute path, make it relative to dir 583 if (!FileName.absolute(argdoc)) 584 { 585 //FileName::ensurePathExists(dir); 586 argdoc = FileName.combine(dir, argdoc); 587 } 588 docfilename = FileName.forceExt(argdoc, ext); 589 } 590 if (FileName.equals(docfilename, srcfile.toString())) 591 { 592 error("source file and output file have same name '%s'", srcfile.toChars()); 593 fatal(); 594 } 595 return FileName(docfilename); 596 } 597 598 extern (D) void setDocfile() 599 { 600 docfile = setOutfilename(global.params.ddoc.name, global.params.ddoc.dir, arg, doc_ext); 601 } 602 603 /** 604 * Trigger the relevant semantic error when a file cannot be read 605 * 606 * We special case `object.d` as a failure is likely to be a rare 607 * but difficult to diagnose case for the user. Packages also require 608 * special handling to avoid exposing the compiler's internals. 609 * 610 * Params: 611 * loc = The location at which the file read originated (e.g. import) 612 */ 613 private void onFileReadError(const ref Loc loc) 614 { 615 if (FileName.equals(srcfile.toString(), "object.d")) 616 { 617 .error(loc, "cannot find source code for runtime library file 'object.d'"); 618 errorSupplemental(loc, "dmd might not be correctly installed. Run 'dmd -man' for installation instructions."); 619 const dmdConfFile = global.inifilename.length ? FileName.canonicalName(global.inifilename) : "not found"; 620 errorSupplemental(loc, "config file: %.*s", cast(int)dmdConfFile.length, dmdConfFile.ptr); 621 } 622 else if (FileName.ext(this.arg) || !loc.isValid()) 623 { 624 // Modules whose original argument name has an extension, or do not 625 // have a valid location come from the command-line. 626 // Error that their file cannot be found and return early. 627 .error(loc, "cannot find input file `%s`", srcfile.toChars()); 628 } 629 else 630 { 631 // if module is not named 'package' but we're trying to read 'package.d', we're looking for a package module 632 bool isPackageMod = (strcmp(toChars(), "package") != 0) && isPackageFileName(srcfile); 633 if (isPackageMod) 634 .error(loc, "importing package '%s' requires a 'package.d' file which cannot be found in '%s'", toChars(), srcfile.toChars()); 635 else 636 { 637 .error(loc, "unable to read module `%s`", toChars()); 638 const pkgfile = FileName.combine(FileName.removeExt(srcfile.toString()), package_d); 639 .errorSupplemental(loc, "Expected '%s' or '%s' in one of the following import paths:", 640 srcfile.toChars(), pkgfile.ptr); 641 } 642 } 643 if (!global.gag) 644 { 645 /* Print path 646 */ 647 if (global.path) 648 { 649 foreach (i, p; *global.path) 650 fprintf(stderr, "import path[%llu] = %s\n", cast(ulong)i, p); 651 } 652 else 653 { 654 fprintf(stderr, "Specify path to file '%s' with -I switch\n", srcfile.toChars()); 655 } 656 657 removeHdrFilesAndFail(global.params, Module.amodules); 658 } 659 } 660 661 /** 662 * Reads the file from `srcfile` and loads the source buffer. 663 * 664 * If makefile module dependency is requested, we add this module 665 * to the list of dependencies from here. 666 * 667 * Params: 668 * loc = the location 669 * 670 * Returns: `true` if successful 671 */ 672 bool read(const ref Loc loc) 673 { 674 if (this.src) 675 return true; // already read 676 677 //printf("Module::read('%s') file '%s'\n", toChars(), srcfile.toChars()); 678 679 /* Preprocess the file if it's a .c file 680 */ 681 FileName filename = srcfile; 682 bool ifile = false; // did we generate a .i file 683 scope (exit) 684 { 685 if (ifile) 686 File.remove(filename.toChars()); // remove generated file 687 } 688 689 if (global.preprocess && 690 FileName.equalsExt(srcfile.toString(), c_ext) && 691 FileName.exists(srcfile.toString())) 692 { 693 filename = global.preprocess(srcfile, loc, ifile, &defines); // run C preprocessor 694 } 695 696 if (auto result = global.fileManager.lookup(filename)) 697 { 698 this.src = result; 699 if (global.params.makeDeps.doOutput) 700 global.params.makeDeps.files.push(srcfile.toChars()); 701 return true; 702 } 703 704 this.onFileReadError(loc); 705 return false; 706 } 707 708 /// syntactic parse 709 Module parse() 710 { 711 return parseModule!ASTCodegen(); 712 } 713 714 /// ditto 715 extern (D) Module parseModule(AST)() 716 { 717 const(char)* srcname = srcfile.toChars(); 718 //printf("Module::parse(srcname = '%s')\n", srcname); 719 isPackageFile = isPackageFileName(srcfile); 720 const(char)[] buf = processSource(src, this); 721 // an error happened on UTF conversion 722 if (buf is null) return null; 723 724 /* If it starts with the string "Ddoc", then it's a documentation 725 * source file. 726 */ 727 if (buf.length>= 4 && buf[0..4] == "Ddoc") 728 { 729 comment = buf.ptr + 4; 730 filetype = FileType.ddoc; 731 if (!docfile) 732 setDocfile(); 733 return this; 734 } 735 /* If it has the extension ".dd", it is also a documentation 736 * source file. Documentation source files may begin with "Ddoc" 737 * but do not have to if they have the .dd extension. 738 * https://issues.dlang.org/show_bug.cgi?id=15465 739 */ 740 if (FileName.equalsExt(arg, dd_ext)) 741 { 742 comment = buf.ptr; // the optional Ddoc, if present, is handled above. 743 filetype = FileType.ddoc; 744 if (!docfile) 745 setDocfile(); 746 return this; 747 } 748 /* If it has the extension ".di", it is a "header" file. 749 */ 750 if (FileName.equalsExt(arg, hdr_ext)) 751 filetype = FileType.dhdr; 752 753 /// Promote `this` to a root module if requested via `-i` 754 void checkCompiledImport() 755 { 756 if (!this.isRoot() && Compiler.onImport(this)) 757 this.importedFrom = this; 758 } 759 760 DsymbolTable dst; 761 Package ppack = null; 762 763 /* If it has the extension ".c", it is a "C" file. 764 * If it has the extension ".i", it is a preprocessed "C" file. 765 */ 766 if (FileName.equalsExt(arg, c_ext) || FileName.equalsExt(arg, i_ext)) 767 { 768 filetype = FileType.c; 769 770 scope p = new CParser!AST(this, buf, cast(bool) docfile, global.errorSink, target.c, &defines, &global.compileEnv); 771 p.nextToken(); 772 checkCompiledImport(); 773 members = p.parseModule(); 774 assert(!p.md); // C doesn't have module declarations 775 numlines = p.scanloc.linnum; 776 } 777 else 778 { 779 const bool doUnittests = global.params.useUnitTests || global.params.ddoc.doOutput || global.params.dihdr.doOutput; 780 scope p = new Parser!AST(this, buf, cast(bool) docfile, global.errorSink, &global.compileEnv, doUnittests); 781 p.transitionIn = global.params.vin; 782 p.nextToken(); 783 p.parseModuleDeclaration(); 784 md = p.md; 785 786 if (md) 787 { 788 /* A ModuleDeclaration, md, was provided. 789 * The ModuleDeclaration sets the packages this module appears in, and 790 * the name of this module. 791 */ 792 this.ident = md.id; 793 dst = Package.resolve(md.packages, &this.parent, &ppack); 794 } 795 796 // Done after parsing the module header because `module x.y.z` may override the file name 797 checkCompiledImport(); 798 799 members = p.parseModuleContent(); 800 numlines = p.scanloc.linnum; 801 } 802 803 /* The symbol table into which the module is to be inserted. 804 */ 805 806 if (md) 807 { 808 // Mark the package path as accessible from the current module 809 // https://issues.dlang.org/show_bug.cgi?id=21661 810 // Code taken from Import.addPackageAccess() 811 if (md.packages.length > 0) 812 { 813 // module a.b.c.d; 814 auto p = ppack; // a 815 addAccessiblePackage(p, Visibility(Visibility.Kind.private_)); 816 foreach (id; md.packages[1 .. $]) // [b, c] 817 { 818 p = cast(Package) p.symtab.lookup(id); 819 if (p is null) 820 break; 821 addAccessiblePackage(p, Visibility(Visibility.Kind.private_)); 822 } 823 } 824 assert(dst); 825 Module m = ppack ? ppack.isModule() : null; 826 if (m && !isPackageFileName(m.srcfile)) 827 { 828 .error(md.loc, "package name '%s' conflicts with usage as a module name in file %s", ppack.toPrettyChars(), m.srcfile.toChars()); 829 } 830 } 831 else 832 { 833 /* The name of the module is set to the source file name. 834 * There are no packages. 835 */ 836 dst = modules; // and so this module goes into global module symbol table 837 /* Check to see if module name is a valid identifier 838 */ 839 if (!Identifier.isValidIdentifier(this.ident.toChars())) 840 error("has non-identifier characters in filename, use module declaration instead"); 841 } 842 // Insert module into the symbol table 843 Dsymbol s = this; 844 if (isPackageFile) 845 { 846 /* If the source tree is as follows: 847 * pkg/ 848 * +- package.d 849 * +- common.d 850 * the 'pkg' will be incorporated to the internal package tree in two ways: 851 * import pkg; 852 * and: 853 * import pkg.common; 854 * 855 * If both are used in one compilation, 'pkg' as a module (== pkg/package.d) 856 * and a package name 'pkg' will conflict each other. 857 * 858 * To avoid the conflict: 859 * 1. If preceding package name insertion had occurred by Package::resolve, 860 * reuse the previous wrapping 'Package' if it exists 861 * 2. Otherwise, 'package.d' wrapped by 'Package' is inserted to the internal tree in here. 862 * 863 * Then change Package::isPkgMod to PKG.module_ and set Package::mod. 864 * 865 * Note that the 'wrapping Package' is the Package that contains package.d and other submodules, 866 * the one inserted to the symbol table. 867 */ 868 auto ps = dst.lookup(ident); 869 Package p = ps ? ps.isPackage() : null; 870 if (p is null) 871 { 872 p = new Package(Loc.initial, ident); 873 p.tag = this.tag; // reuse the same package tag 874 p.symtab = new DsymbolTable(); 875 } 876 this.tag = p.tag; // reuse the 'older' package tag 877 this.pkg = p; 878 p.parent = this.parent; 879 p.isPkgMod = PKG.module_; 880 p.mod = this; 881 s = p; 882 } 883 if (!dst.insert(s)) 884 { 885 /* It conflicts with a name that is already in the symbol table. 886 * Figure out what went wrong, and issue error message. 887 */ 888 Dsymbol prev = dst.lookup(ident); 889 assert(prev); 890 if (Module mprev = prev.isModule()) 891 { 892 if (!FileName.equals(srcname, mprev.srcfile.toChars())) 893 error(loc, "from file %s conflicts with another module %s from file %s", srcname, mprev.toChars(), mprev.srcfile.toChars()); 894 else if (isRoot() && mprev.isRoot()) 895 error(loc, "from file %s is specified twice on the command line", srcname); 896 else 897 error(loc, "from file %s must be imported with 'import %s;'", srcname, toPrettyChars()); 898 // https://issues.dlang.org/show_bug.cgi?id=14446 899 // Return previously parsed module to avoid AST duplication ICE. 900 return mprev; 901 } 902 else if (Package pkg = prev.isPackage()) 903 { 904 // 'package.d' loaded after a previous 'Package' insertion 905 if (isPackageFile) 906 amodules.push(this); // Add to global array of all modules 907 else 908 error(md ? md.loc : loc, "from file %s conflicts with package name %s", srcname, pkg.toChars()); 909 } 910 else 911 assert(global.errors); 912 } 913 else 914 { 915 // Add to global array of all modules 916 amodules.push(this); 917 } 918 Compiler.onParseModule(this); 919 return this; 920 } 921 922 override void importAll(Scope* prevsc) 923 { 924 //printf("+Module::importAll(this = %p, '%s'): parent = %p\n", this, toChars(), parent); 925 if (_scope) 926 return; // already done 927 if (filetype == FileType.ddoc) 928 { 929 error("is a Ddoc file, cannot import it"); 930 return; 931 } 932 933 /* Note that modules get their own scope, from scratch. 934 * This is so regardless of where in the syntax a module 935 * gets imported, it is unaffected by context. 936 * Ignore prevsc. 937 */ 938 Scope* sc = Scope.createGlobal(this); // create root scope 939 940 if (md && md.msg) 941 md.msg = semanticString(sc, md.msg, "deprecation message"); 942 943 // Add import of "object", even for the "object" module. 944 // If it isn't there, some compiler rewrites, like 945 // classinst == classinst -> .object.opEquals(classinst, classinst) 946 // would fail inside object.d. 947 if (filetype != FileType.c && 948 (members.length == 0 || 949 (*members)[0].ident != Id.object || 950 (*members)[0].isImport() is null)) 951 { 952 auto im = new Import(Loc.initial, null, Id.object, null, 0); 953 members.shift(im); 954 } 955 if (!symtab) 956 { 957 // Add all symbols into module's symbol table 958 symtab = new DsymbolTable(); 959 for (size_t i = 0; i < members.length; i++) 960 { 961 Dsymbol s = (*members)[i]; 962 s.addMember(sc, sc.scopesym); 963 } 964 } 965 // anything else should be run after addMember, so version/debug symbols are defined 966 /* Set scope for the symbols so that if we forward reference 967 * a symbol, it can possibly be resolved on the spot. 968 * If this works out well, it can be extended to all modules 969 * before any semantic() on any of them. 970 */ 971 setScope(sc); // remember module scope for semantic 972 for (size_t i = 0; i < members.length; i++) 973 { 974 Dsymbol s = (*members)[i]; 975 s.setScope(sc); 976 } 977 for (size_t i = 0; i < members.length; i++) 978 { 979 Dsymbol s = (*members)[i]; 980 s.importAll(sc); 981 } 982 sc = sc.pop(); 983 sc.pop(); // 2 pops because Scope.createGlobal() created 2 984 } 985 986 /********************************** 987 * Determine if we need to generate an instance of ModuleInfo 988 * for this Module. 989 */ 990 int needModuleInfo() 991 { 992 //printf("needModuleInfo() %s, %d, %d\n", toChars(), needmoduleinfo, global.params.cov); 993 return needmoduleinfo || global.params.cov; 994 } 995 996 /******************************************* 997 * Print deprecation warning if we're deprecated, when 998 * this module is imported from scope sc. 999 * 1000 * Params: 1001 * sc = the scope into which we are imported 1002 * loc = the location of the import statement 1003 */ 1004 void checkImportDeprecation(const ref Loc loc, Scope* sc) 1005 { 1006 if (md && md.isdeprecated && !sc.isDeprecated) 1007 { 1008 Expression msg = md.msg; 1009 if (StringExp se = msg ? msg.toStringExp() : null) 1010 { 1011 const slice = se.peekString(); 1012 if (slice.length) 1013 { 1014 deprecation(loc, "is deprecated - %.*s", cast(int)slice.length, slice.ptr); 1015 return; 1016 } 1017 } 1018 deprecation(loc, "is deprecated"); 1019 } 1020 } 1021 1022 override Dsymbol search(const ref Loc loc, Identifier ident, int flags = SearchLocalsOnly) 1023 { 1024 /* Since modules can be circularly referenced, 1025 * need to stop infinite recursive searches. 1026 * This is done with the cache. 1027 */ 1028 //printf("%s Module.search('%s', flags = x%x) insearch = %d\n", toChars(), ident.toChars(), flags, insearch); 1029 if (insearch) 1030 return null; 1031 1032 /* Qualified module searches always search their imports, 1033 * even if SearchLocalsOnly 1034 */ 1035 if (!(flags & SearchUnqualifiedModule)) 1036 flags &= ~(SearchUnqualifiedModule | SearchLocalsOnly); 1037 1038 if (searchCacheIdent == ident && searchCacheFlags == flags) 1039 { 1040 //printf("%s Module::search('%s', flags = %d) insearch = %d searchCacheSymbol = %s\n", 1041 // toChars(), ident.toChars(), flags, insearch, searchCacheSymbol ? searchCacheSymbol.toChars() : "null"); 1042 return searchCacheSymbol; 1043 } 1044 1045 uint errors = global.errors; 1046 1047 insearch = true; 1048 Dsymbol s = ScopeDsymbol.search(loc, ident, flags); 1049 insearch = false; 1050 1051 if (errors == global.errors) 1052 { 1053 // https://issues.dlang.org/show_bug.cgi?id=10752 1054 // Can cache the result only when it does not cause 1055 // access error so the side-effect should be reproduced in later search. 1056 searchCacheIdent = ident; 1057 searchCacheSymbol = s; 1058 searchCacheFlags = flags; 1059 } 1060 return s; 1061 } 1062 1063 override bool isPackageAccessible(Package p, Visibility visibility, int flags = 0) 1064 { 1065 if (insearch) // don't follow import cycles 1066 return false; 1067 insearch = true; 1068 scope (exit) 1069 insearch = false; 1070 if (flags & IgnorePrivateImports) 1071 visibility = Visibility(Visibility.Kind.public_); // only consider public imports 1072 return super.isPackageAccessible(p, visibility); 1073 } 1074 1075 override Dsymbol symtabInsert(Dsymbol s) 1076 { 1077 searchCacheIdent = null; // symbol is inserted, so invalidate cache 1078 return Package.symtabInsert(s); 1079 } 1080 1081 void deleteObjFile() 1082 { 1083 if (global.params.obj) 1084 File.remove(objfile.toChars()); 1085 if (docfile) 1086 File.remove(docfile.toChars()); 1087 } 1088 1089 /******************************************* 1090 * Can't run semantic on s now, try again later. 1091 */ 1092 extern (D) static void addDeferredSemantic(Dsymbol s) 1093 { 1094 //printf("Module::addDeferredSemantic('%s')\n", s.toChars()); 1095 if (!deferred.contains(s)) 1096 deferred.push(s); 1097 } 1098 1099 extern (D) static void addDeferredSemantic2(Dsymbol s) 1100 { 1101 //printf("Module::addDeferredSemantic2('%s')\n", s.toChars()); 1102 if (!deferred2.contains(s)) 1103 deferred2.push(s); 1104 } 1105 1106 extern (D) static void addDeferredSemantic3(Dsymbol s) 1107 { 1108 //printf("Module::addDeferredSemantic3('%s')\n", s.toChars()); 1109 if (!deferred.contains(s)) 1110 deferred3.push(s); 1111 } 1112 1113 /****************************************** 1114 * Run semantic() on deferred symbols. 1115 */ 1116 static void runDeferredSemantic() 1117 { 1118 __gshared int nested; 1119 if (nested) 1120 return; 1121 //if (deferred.length) printf("+Module::runDeferredSemantic(), len = %ld\n", deferred.length); 1122 nested++; 1123 1124 size_t len; 1125 do 1126 { 1127 len = deferred.length; 1128 if (!len) 1129 break; 1130 1131 Dsymbol* todo; 1132 Dsymbol* todoalloc = null; 1133 Dsymbol tmp; 1134 if (len == 1) 1135 { 1136 todo = &tmp; 1137 } 1138 else 1139 { 1140 todo = cast(Dsymbol*)Mem.check(malloc(len * Dsymbol.sizeof)); 1141 todoalloc = todo; 1142 } 1143 memcpy(todo, deferred.tdata(), len * Dsymbol.sizeof); 1144 deferred.setDim(0); 1145 1146 foreach (i; 0..len) 1147 { 1148 Dsymbol s = todo[i]; 1149 s.dsymbolSemantic(null); 1150 //printf("deferred: %s, parent = %s\n", s.toChars(), s.parent.toChars()); 1151 } 1152 //printf("\tdeferred.length = %ld, len = %ld\n", deferred.length, len); 1153 if (todoalloc) 1154 free(todoalloc); 1155 } 1156 while (deferred.length != len); // while making progress 1157 nested--; 1158 //printf("-Module::runDeferredSemantic(), len = %ld\n", deferred.length); 1159 } 1160 1161 static void runDeferredSemantic2() 1162 { 1163 Module.runDeferredSemantic(); 1164 1165 Dsymbols* a = &Module.deferred2; 1166 for (size_t i = 0; i < a.length; i++) 1167 { 1168 Dsymbol s = (*a)[i]; 1169 //printf("[%d] %s semantic2a\n", i, s.toPrettyChars()); 1170 s.semantic2(null); 1171 1172 if (global.errors) 1173 break; 1174 } 1175 a.setDim(0); 1176 } 1177 1178 static void runDeferredSemantic3() 1179 { 1180 Module.runDeferredSemantic2(); 1181 1182 Dsymbols* a = &Module.deferred3; 1183 for (size_t i = 0; i < a.length; i++) 1184 { 1185 Dsymbol s = (*a)[i]; 1186 //printf("[%d] %s semantic3a\n", i, s.toPrettyChars()); 1187 s.semantic3(null); 1188 1189 if (global.errors) 1190 break; 1191 } 1192 a.setDim(0); 1193 } 1194 1195 extern (D) static void clearCache() nothrow 1196 { 1197 foreach (Module m; amodules) 1198 m.searchCacheIdent = null; 1199 } 1200 1201 /************************************ 1202 * Recursively look at every module this module imports, 1203 * return true if it imports m. 1204 * Can be used to detect circular imports. 1205 */ 1206 int imports(Module m) nothrow 1207 { 1208 //printf("%s Module::imports(%s)\n", toChars(), m.toChars()); 1209 version (none) 1210 { 1211 foreach (i, Module mi; aimports) 1212 printf("\t[%d] %s\n", cast(int) i, mi.toChars()); 1213 } 1214 foreach (Module mi; aimports) 1215 { 1216 if (mi == m) 1217 return true; 1218 if (!mi.insearch) 1219 { 1220 mi.insearch = true; 1221 int r = mi.imports(m); 1222 if (r) 1223 return r; 1224 } 1225 } 1226 return false; 1227 } 1228 1229 bool isRoot() nothrow 1230 { 1231 return this.importedFrom == this; 1232 } 1233 1234 /// Returns: Whether this module is in the `core` package and has name `ident` 1235 bool isCoreModule(Identifier ident) nothrow 1236 { 1237 return this.ident == ident && parent && parent.ident == Id.core && !parent.parent; 1238 } 1239 1240 // Back end 1241 int doppelganger; // sub-module 1242 Symbol* cov; // private uint[] __coverage; 1243 uint* covb; // bit array of valid code line numbers 1244 Symbol* sictor; // module order independent constructor 1245 Symbol* sctor; // module constructor 1246 Symbol* sdtor; // module destructor 1247 Symbol* ssharedctor; // module shared constructor 1248 Symbol* sshareddtor; // module shared destructor 1249 Symbol* stest; // module unit test 1250 Symbol* sfilename; // symbol for filename 1251 1252 uint[uint] ctfe_cov; /// coverage information from ctfe execution_count[line] 1253 1254 override inout(Module) isModule() inout nothrow 1255 { 1256 return this; 1257 } 1258 1259 override void accept(Visitor v) 1260 { 1261 v.visit(this); 1262 } 1263 1264 /*********************************************** 1265 * Writes this module's fully-qualified name to buf 1266 * Params: 1267 * buf = The buffer to write to 1268 */ 1269 void fullyQualifiedName(ref OutBuffer buf) nothrow 1270 { 1271 buf.writestring(ident.toString()); 1272 1273 for (auto package_ = parent; package_ !is null; package_ = package_.parent) 1274 { 1275 buf.prependstring("."); 1276 buf.prependstring(package_.ident.toChars()); 1277 } 1278 } 1279 1280 /** Lazily initializes and returns the escape table. 1281 Turns out it eats a lot of memory. 1282 */ 1283 extern(D) Escape* escapetable() nothrow 1284 { 1285 if (!_escapetable) 1286 _escapetable = new Escape(); 1287 return _escapetable; 1288 } 1289 1290 /**************************** 1291 * A Singleton that loads core.stdc.config 1292 * Returns: 1293 * Module of core.stdc.config, null if couldn't find it 1294 */ 1295 extern (D) static Module loadCoreStdcConfig() 1296 { 1297 __gshared Module core_stdc_config; 1298 auto pkgids = new Identifier[2]; 1299 pkgids[0] = Id.core; 1300 pkgids[1] = Id.stdc; 1301 return loadModuleFromLibrary(core_stdc_config, pkgids, Id.config); 1302 } 1303 1304 /**************************** 1305 * A Singleton that loads core.atomic 1306 * Returns: 1307 * Module of core.atomic, null if couldn't find it 1308 */ 1309 extern (D) static Module loadCoreAtomic() 1310 { 1311 __gshared Module core_atomic; 1312 auto pkgids = new Identifier[1]; 1313 pkgids[0] = Id.core; 1314 return loadModuleFromLibrary(core_atomic, pkgids, Id.atomic); 1315 } 1316 1317 /**************************** 1318 * A Singleton that loads std.math 1319 * Returns: 1320 * Module of std.math, null if couldn't find it 1321 */ 1322 extern (D) static Module loadStdMath() 1323 { 1324 __gshared Module std_math; 1325 auto pkgids = new Identifier[1]; 1326 pkgids[0] = Id.std; 1327 return loadModuleFromLibrary(std_math, pkgids, Id.math); 1328 } 1329 1330 /********************************** 1331 * Load a Module from the library. 1332 * Params: 1333 * mod = cached return value of this call 1334 * pkgids = package identifiers 1335 * modid = module id 1336 * Returns: 1337 * Module loaded, null if cannot load it 1338 */ 1339 extern (D) private static Module loadModuleFromLibrary(ref Module mod, Identifier[] pkgids, Identifier modid) 1340 { 1341 if (mod) 1342 return mod; 1343 1344 auto imp = new Import(Loc.initial, pkgids[], modid, null, true); 1345 // Module.load will call fatal() if there's no module available. 1346 // Gag the error here, pushing the error handling to the caller. 1347 const errors = global.startGagging(); 1348 imp.load(null); 1349 if (imp.mod) 1350 { 1351 imp.mod.importAll(null); 1352 imp.mod.dsymbolSemantic(null); 1353 } 1354 global.endGagging(errors); 1355 mod = imp.mod; 1356 return mod; 1357 } 1358 } 1359 1360 /*********************************************************** 1361 */ 1362 extern (C++) struct ModuleDeclaration 1363 { 1364 Loc loc; 1365 Identifier id; 1366 Identifier[] packages; // array of Identifier's representing packages 1367 bool isdeprecated; // if it is a deprecated module 1368 Expression msg; 1369 1370 extern (D) this(const ref Loc loc, Identifier[] packages, Identifier id, Expression msg, bool isdeprecated) 1371 { 1372 this.loc = loc; 1373 this.packages = packages; 1374 this.id = id; 1375 this.msg = msg; 1376 this.isdeprecated = isdeprecated; 1377 } 1378 1379 extern (C++) const(char)* toChars() const 1380 { 1381 OutBuffer buf; 1382 foreach (pid; packages) 1383 { 1384 buf.writestring(pid.toString()); 1385 buf.writeByte('.'); 1386 } 1387 buf.writestring(id.toString()); 1388 return buf.extractChars(); 1389 } 1390 1391 /// Provide a human readable representation 1392 extern (D) const(char)[] toString() const 1393 { 1394 return this.toChars().toDString; 1395 } 1396 } 1397 1398 /**************************************** 1399 * Create array of the local classes in the Module, suitable 1400 * for inclusion in ModuleInfo 1401 * Params: 1402 * mod = the Module 1403 * aclasses = array to fill in 1404 * Returns: array of local classes 1405 */ 1406 extern (C++) void getLocalClasses(Module mod, ref ClassDeclarations aclasses) 1407 { 1408 //printf("members.length = %d\n", mod.members.length); 1409 int pushAddClassDg(size_t n, Dsymbol sm) 1410 { 1411 if (!sm) 1412 return 0; 1413 1414 if (auto cd = sm.isClassDeclaration()) 1415 { 1416 // compatibility with previous algorithm 1417 if (cd.parent && cd.parent.isTemplateMixin()) 1418 return 0; 1419 1420 if (cd.classKind != ClassKind.objc) 1421 aclasses.push(cd); 1422 } 1423 return 0; 1424 } 1425 1426 ScopeDsymbol._foreach(null, mod.members, &pushAddClassDg); 1427 } 1428 1429 /** 1430 * Process the content of a source file 1431 * 1432 * Attempts to find which encoding it is using, if it has BOM, 1433 * and then normalize the source to UTF-8. If no encoding is required, 1434 * a slice of `src` will be returned without extra allocation. 1435 * 1436 * Params: 1437 * src = Content of the source file to process 1438 * mod = Module matching `src`, used for error handling 1439 * 1440 * Returns: 1441 * UTF-8 encoded variant of `src`, stripped of any BOM, 1442 * or `null` if an error happened. 1443 */ 1444 private const(char)[] processSource (const(ubyte)[] src, Module mod) 1445 { 1446 enum SourceEncoding { utf16, utf32} 1447 enum Endian { little, big} 1448 1449 /* 1450 * Convert a buffer from UTF32 to UTF8 1451 * Params: 1452 * Endian = is the buffer big/little endian 1453 * buf = buffer of UTF32 data 1454 * Returns: 1455 * input buffer reencoded as UTF8 1456 */ 1457 1458 char[] UTF32ToUTF8(Endian endian)(const(char)[] buf) 1459 { 1460 static if (endian == Endian.little) 1461 alias readNext = Port.readlongLE; 1462 else 1463 alias readNext = Port.readlongBE; 1464 1465 if (buf.length & 3) 1466 { 1467 mod.error("odd length of UTF-32 char source %llu", cast(ulong) buf.length); 1468 return null; 1469 } 1470 1471 const (uint)[] eBuf = cast(const(uint)[])buf; 1472 1473 OutBuffer dbuf; 1474 dbuf.reserve(eBuf.length); 1475 1476 foreach (i; 0 .. eBuf.length) 1477 { 1478 const u = readNext(&eBuf[i]); 1479 if (u & ~0x7F) 1480 { 1481 if (u > 0x10FFFF) 1482 { 1483 mod.error("UTF-32 value %08x greater than 0x10FFFF", u); 1484 return null; 1485 } 1486 dbuf.writeUTF8(u); 1487 } 1488 else 1489 dbuf.writeByte(u); 1490 } 1491 dbuf.writeByte(0); //add null terminator 1492 return dbuf.extractSlice(); 1493 } 1494 1495 /* 1496 * Convert a buffer from UTF16 to UTF8 1497 * Params: 1498 * Endian = is the buffer big/little endian 1499 * buf = buffer of UTF16 data 1500 * Returns: 1501 * input buffer reencoded as UTF8 1502 */ 1503 1504 char[] UTF16ToUTF8(Endian endian)(const(char)[] buf) 1505 { 1506 static if (endian == Endian.little) 1507 alias readNext = Port.readwordLE; 1508 else 1509 alias readNext = Port.readwordBE; 1510 1511 if (buf.length & 1) 1512 { 1513 mod.error("odd length of UTF-16 char source %llu", cast(ulong) buf.length); 1514 return null; 1515 } 1516 1517 const (ushort)[] eBuf = cast(const(ushort)[])buf; 1518 1519 OutBuffer dbuf; 1520 dbuf.reserve(eBuf.length); 1521 1522 //i will be incremented in the loop for high codepoints 1523 foreach (ref i; 0 .. eBuf.length) 1524 { 1525 uint u = readNext(&eBuf[i]); 1526 if (u & ~0x7F) 1527 { 1528 if (0xD800 <= u && u < 0xDC00) 1529 { 1530 i++; 1531 if (i >= eBuf.length) 1532 { 1533 mod.error("surrogate UTF-16 high value %04x at end of file", u); 1534 return null; 1535 } 1536 const u2 = readNext(&eBuf[i]); 1537 if (u2 < 0xDC00 || 0xE000 <= u2) 1538 { 1539 mod.error("surrogate UTF-16 low value %04x out of range", u2); 1540 return null; 1541 } 1542 u = (u - 0xD7C0) << 10; 1543 u |= (u2 - 0xDC00); 1544 } 1545 else if (u >= 0xDC00 && u <= 0xDFFF) 1546 { 1547 mod.error("unpaired surrogate UTF-16 value %04x", u); 1548 return null; 1549 } 1550 else if (u == 0xFFFE || u == 0xFFFF) 1551 { 1552 mod.error("illegal UTF-16 value %04x", u); 1553 return null; 1554 } 1555 dbuf.writeUTF8(u); 1556 } 1557 else 1558 dbuf.writeByte(u); 1559 } 1560 dbuf.writeByte(0); //add a terminating null byte 1561 return dbuf.extractSlice(); 1562 } 1563 1564 const(char)[] buf = cast(const(char)[]) src; 1565 1566 // Assume the buffer is from memory and has not be read from disk. Assume UTF-8. 1567 if (buf.length < 2) 1568 return buf; 1569 1570 /* Convert all non-UTF-8 formats to UTF-8. 1571 * BOM : https://www.unicode.org/faq/utf_bom.html 1572 * 00 00 FE FF UTF-32BE, big-endian 1573 * FF FE 00 00 UTF-32LE, little-endian 1574 * FE FF UTF-16BE, big-endian 1575 * FF FE UTF-16LE, little-endian 1576 * EF BB BF UTF-8 1577 */ 1578 if (buf[0] == 0xFF && buf[1] == 0xFE) 1579 { 1580 if (buf.length >= 4 && buf[2] == 0 && buf[3] == 0) 1581 return UTF32ToUTF8!(Endian.little)(buf[4 .. $]); 1582 return UTF16ToUTF8!(Endian.little)(buf[2 .. $]); 1583 } 1584 1585 if (buf[0] == 0xFE && buf[1] == 0xFF) 1586 return UTF16ToUTF8!(Endian.big)(buf[2 .. $]); 1587 1588 if (buf.length >= 4 && buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && buf[3] == 0xFF) 1589 return UTF32ToUTF8!(Endian.big)(buf[4 .. $]); 1590 1591 if (buf.length >= 3 && buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) 1592 return buf[3 .. $]; 1593 1594 /* There is no BOM. Make use of Arcane Jill's insight that 1595 * the first char of D source must be ASCII to 1596 * figure out the encoding. 1597 */ 1598 if (buf.length >= 4 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0) 1599 return UTF32ToUTF8!(Endian.little)(buf); 1600 if (buf.length >= 4 && buf[0] == 0 && buf[1] == 0 && buf[2] == 0) 1601 return UTF32ToUTF8!(Endian.big)(buf); 1602 // try to check for UTF-16 1603 if (buf.length >= 2 && buf[1] == 0) 1604 return UTF16ToUTF8!(Endian.little)(buf); 1605 if (buf[0] == 0) 1606 return UTF16ToUTF8!(Endian.big)(buf); 1607 1608 // It's UTF-8 1609 if (buf[0] >= 0x80) 1610 { 1611 mod.error("source file must start with BOM or ASCII character, not \\x%02X", buf[0]); 1612 return null; 1613 } 1614 1615 return buf; 1616 }