以前就觉得Nodejs的MooTools库很奇怪,因为用他的时候,不需要把require的返回值保存起来,今天实在憋不住,就研究了下,对NodeJs的require机制又有了几分深刻的理解。
MooTools库的“奇怪”用法:
require('mootools'); var QueryCommand = new Class({ initialize: function (product, week, action) { this.product = product; this.weeknum = week; this.action = action; }, toJsonString: function () { return JSON.stringify(this); } });
看到没,把require当作C++的include用了,之后的代码可以直接使用Class类型了。其实,MooTools的做法是把Class这个类型“附加”到了全局作用域。我觉得MooTools可能就是这么设计的,因为他是一个OO库嘛。
那么,MooTools是怎么把他自己附加到全局作用域的呢?其实很巧妙,是通过函数字面量做的:
(function(){ var Class = this.Class = new Type('Class', function(params){ if (instanceOf(params, Function)) params = {initialize: params}; var newClass = function(){ reset(this); if (newClass.$prototyping) return this; this.$caller = null; var value = (this.initialize) ? this.initialize.apply(this, arguments) : this; this.$caller = this.caller = null; return value; }.extend(this).implement(params); newClass.$constructor = Class; newClass.prototype.$constructor = newClass; newClass.prototype.parent = parent; return newClass; }); ...... })();
他通过函数字面量定义了一个匿名函数,然后立即执行之。这个是JS的惯用法。有一点要注意的是,在执行这个函数的时候,其实是没有this指针的!那么在函数内部使用this,访问的就是全局对象,就是那个始祖对象!
再看一个更加简单的例子:
app.js
require('./a.js') console.log(a);
a.js
a = 3; //输出3
注意,上面不是var a=3;所以该语句实在全局作用域上附加了a这个属性(注意,也不是this!是始祖对象),然后呢,在app.js模块就能使用了。其实,在其他的模块也能访问了。
接下来说说,为什么会污染到全局作用域,nodejs不是用require来加载每个模块的嘛,模块应该互相独立的呀,其实nodeJs的模块加载顺序是这样的:
Module._load("xxx.js") --> var module = new Module(); --> module.load("xxx.js") --> module._compile() --> 最终调用的是被wrapper的模块代码,上面那个例子的话,最终执行的是:
(function (exports, require, module, __filename, __dirname) { a = 3; });
看一下module.compile()的代码就清楚了:
// Returns exception if any Module.prototype._compile = function(content, filename) { var self = this; // remove shebang content = content.replace(/^#!.*/, ''); function require(path) { return self.require(path); } require.resolve = function(request) { return Module._resolveFilename(request, self); }; Object.defineProperty(require, 'paths', { get: function() { throw new Error('require.paths is removed. Use ' + 'node_modules folders, or the NODE_PATH ' + 'environment variable instead.'); }}); require.main = process.mainModule; // Enable support to add extra extension types require.extensions = Module._extensions; require.registerExtension = function() { throw new Error('require.registerExtension() removed. Use ' + 'require.extensions instead.'); }; require.cache = Module._cache; var dirname = path.dirname(filename); if (Module._contextLoad) { if (self.id !== '.') { debug('load submodule'); // not root module var sandbox = {}; for (var k in global) { sandbox[k] = global[k]; } sandbox.require = require; sandbox.exports = self.exports; sandbox.__filename = filename; sandbox.__dirname = dirname; sandbox.module = self; sandbox.global = sandbox; sandbox.root = root; return runInNewContext(content, sandbox, filename, true); } debug('load root module'); // root module global.require = require; global.exports = self.exports; global.__filename = filename; global.__dirname = dirname; global.module = self; return runInThisContext(content, filename, true); } // create wrapper function var wrapper = Module.wrap(content); var compiledWrapper = runInThisContext(wrapper, filename, true); if (global.v8debug) { if (!resolvedArgv) { // we enter the repl if we're not given a filename argument. if (process.argv[1]) { resolvedArgv = Module._resolveFilename(process.argv[1], null); } else { resolvedArgv = 'repl'; } } // Set breakpoint on module start if (filename === resolvedArgv) { global.v8debug.Debug.setBreakPoint(compiledWrapper, 0, 0); } } var args = [self.exports, require, self, filename, dirname]; return compiledWrapper.apply(self.exports, args); };
Module._contextLoad是false,所以有一些代码可以忽略掉,但里面有几个重点:
- 在函数入口处,定义了require函数,require函数还有一些静态的属性、成员函数。最终,require作为调用compiledWrapper.apply()的一个实参。
- var wrapper其实是一个字符串类型的变量,值是 “(function (exports, require, module, __filename, __dirname) { ” + 你的模块代码 + "});",就是上面我贴出来的那包装过的函数。最终,他通过调用runInThisContext()把字符串转成一个真正的函数。
- 最终通过调用被包装好的函数,执行你的模块代码。this指针指向的是self.exports,也就是module.exports,也就是exports。所以在你的模块内部:this === exports === module.exports。
- args是参数,再次重申:之所以你能在模块内部使用require(),是因为他是作为一个参数传入的。
exports其实是一个空的对象,所以exports的初值是空的,可以看这段代码:
function Module(id, parent) { this.id = id; this.exports = {}; this.parent = parent; if (parent && parent.children) { parent.children.push(this); } this.filename = null; this.loaded = false; this.children = []; }
最终是怎么把exports返回出来的呢,是在这个函数里面:
Module._load = function(request, parent, isMain) { if (parent) { debug('Module._load REQUEST ' + (request) + ' parent: ' + parent.id); } var filename = Module._resolveFilename(request, parent); var cachedModule = Module._cache[filename]; if (cachedModule) { return cachedModule.exports; } if (NativeModule.exists(filename)) { // REPL is a special case, because it needs the real require. if (filename == 'repl') { var replModule = new Module('repl'); replModule._compile(NativeModule.getSource('repl'), 'repl.js'); NativeModule._cache.repl = replModule; return replModule.exports; } debug('load native module ' + request); return NativeModule.require(filename); } var module = new Module(filename, parent); if (isMain) { process.mainModule = module; module.id = '.'; } Module._cache[filename] = module; var hadException = true; try { module.load(filename); hadException = false; } finally { if (hadException) { delete Module._cache[filename]; } } return module.exports; };
NodeJs调试一次不容易,把所有的代码,都贴上来(看看还蛮有收获的,有一个stripBOM工具函数,呵呵):
(function (exports, require, module, __filename, __dirname) { // Copyright Joyent, Inc. and other Node contributors. // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the // "Software"), to deal in the Software without restriction, including // without limitation the rights to use, copy, modify, merge, publish, // distribute, sublicense, and/or sell copies of the Software, and to permit // persons to whom the Software is furnished to do so, subject to the // following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN // NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE // USE OR OTHER DEALINGS IN THE SOFTWARE. var NativeModule = require('native_module'); var Script = process.binding('evals').NodeScript; var runInThisContext = Script.runInThisContext; var runInNewContext = Script.runInNewContext; var assert = require('assert').ok; // If obj.hasOwnProperty has been overridden, then calling // obj.hasOwnProperty(prop) will break. // See: https://github.com/joyent/node/issues/1707 function hasOwnProperty(obj, prop) { return Object.prototype.hasOwnProperty.call(obj, prop); } function Module(id, parent) { this.id = id; this.exports = {}; this.parent = parent; if (parent && parent.children) { parent.children.push(this); } this.filename = null; this.loaded = false; this.children = []; } module.exports = Module; // Set the environ variable NODE_MODULE_CONTEXTS=1 to make node load all // modules in thier own context. Module._contextLoad = (+process.env['NODE_MODULE_CONTEXTS'] > 0); Module._cache = {}; Module._pathCache = {}; Module._extensions = {}; var modulePaths = []; Module.globalPaths = []; Module.wrapper = NativeModule.wrapper; Module.wrap = NativeModule.wrap; var path = NativeModule.require('path'); Module._debug = function() {}; if (process.env.NODE_DEBUG && /module/.test(process.env.NODE_DEBUG)) { Module._debug = function(x) { console.error(x); }; } // We use this alias for the preprocessor that filters it out var debug = Module._debug; // given a module name, and a list of paths to test, returns the first // matching file in the following precedence. // // require("a.<ext>") // -> a.<ext> // // require("a") // -> a // -> a.<ext> // -> a/index.<ext> function statPath(path) { var fs = NativeModule.require('fs'); try { return fs.statSync(path); } catch (ex) {} return false; } // check if the directory is a package.json dir var packageCache = {}; function readPackage(requestPath) { if (hasOwnProperty(packageCache, requestPath)) { return packageCache[requestPath]; } var fs = NativeModule.require('fs'); try { var jsonPath = path.resolve(requestPath, 'package.json'); var json = fs.readFileSync(jsonPath, 'utf8'); } catch (e) { return false; } try { var pkg = packageCache[requestPath] = JSON.parse(json); } catch (e) { e.path = jsonPath; e.message = 'Error parsing ' + jsonPath + ': ' + e.message; throw e; } return pkg; } function tryPackage(requestPath, exts) { var pkg = readPackage(requestPath); if (!pkg || !pkg.main) return false; var filename = path.resolve(requestPath, pkg.main); return tryFile(filename) || tryExtensions(filename, exts) || tryExtensions(path.resolve(filename, 'index'), exts); } // In order to minimize unnecessary lstat() calls, // this cache is a list of known-real paths. // Set to an empty object to reset. Module._realpathCache = {}; // check if the file exists and is not a directory function tryFile(requestPath) { var fs = NativeModule.require('fs'); var stats = statPath(requestPath); if (stats && !stats.isDirectory()) { return fs.realpathSync(requestPath, Module._realpathCache); } return false; } // given a path check a the file exists with any of the set extensions function tryExtensions(p, exts) { for (var i = 0, EL = exts.length; i < EL; i++) { var filename = tryFile(p + exts[i]); if (filename) { return filename; } } return false; } Module._findPath = function(request, paths) { var exts = Object.keys(Module._extensions); if (request.charAt(0) === '/') { paths = ['']; } var trailingSlash = (request.slice(-1) === '/'); var cacheKey = JSON.stringify({request: request, paths: paths}); if (Module._pathCache[cacheKey]) { return Module._pathCache[cacheKey]; } // For each path for (var i = 0, PL = paths.length; i < PL; i++) { var basePath = path.resolve(paths[i], request); var filename; if (!trailingSlash) { // try to join the request to the path filename = tryFile(basePath); if (!filename && !trailingSlash) { // try it with each of the extensions filename = tryExtensions(basePath, exts); } } if (!filename) { filename = tryPackage(basePath, exts); } if (!filename) { // try it with each of the extensions at "index" filename = tryExtensions(path.resolve(basePath, 'index'), exts); } if (filename) { Module._pathCache[cacheKey] = filename; return filename; } } return false; }; // 'from' is the __dirname of the module. Module._nodeModulePaths = function(from) { // guarantee that 'from' is absolute. from = path.resolve(from); // note: this approach *only* works when the path is guaranteed // to be absolute. Doing a fully-edge-case-correct path.split // that works on both Windows and Posix is non-trivial. var splitRe = process.platform === 'win32' ? /[/\]/ : ///; // yes, '/' works on both, but let's be a little canonical. var joiner = process.platform === 'win32' ? '\' : '/'; var paths = []; var parts = from.split(splitRe); for (var tip = parts.length - 1; tip >= 0; tip--) { // don't search in .../node_modules/node_modules if (parts[tip] === 'node_modules') continue; var dir = parts.slice(0, tip + 1).concat('node_modules').join(joiner); paths.push(dir); } return paths; }; Module._resolveLookupPaths = function(request, parent) { if (NativeModule.exists(request)) { return [request, []]; } var start = request.substring(0, 2); if (start !== './' && start !== '..') { var paths = modulePaths; if (parent) { if (!parent.paths) parent.paths = []; paths = parent.paths.concat(paths); } return [request, paths]; } // with --eval, parent.id is not set and parent.filename is null if (!parent || !parent.id || !parent.filename) { // make require('./path/to/foo') work - normally the path is taken // from realpath(__filename) but with eval there is no filename var mainPaths = ['.'].concat(modulePaths); mainPaths = Module._nodeModulePaths('.').concat(mainPaths); return [request, mainPaths]; } // Is the parent an index module? // We can assume the parent has a valid extension, // as it already has been accepted as a module. var isIndex = /^index.w+?$/.test(path.basename(parent.filename)); var parentIdPath = isIndex ? parent.id : path.dirname(parent.id); var id = path.resolve(parentIdPath, request); // make sure require('./path') and require('path') get distinct ids, even // when called from the toplevel js file if (parentIdPath === '.' && id.indexOf('/') === -1) { id = './' + id; } debug('RELATIVE: requested:' + request + ' set ID to: ' + id + ' from ' + parent.id); return [id, [path.dirname(parent.filename)]]; }; Module._load = function(request, parent, isMain) { if (parent) { debug('Module._load REQUEST ' + (request) + ' parent: ' + parent.id); } var filename = Module._resolveFilename(request, parent); var cachedModule = Module._cache[filename]; if (cachedModule) { return cachedModule.exports; } if (NativeModule.exists(filename)) { // REPL is a special case, because it needs the real require. if (filename == 'repl') { var replModule = new Module('repl'); replModule._compile(NativeModule.getSource('repl'), 'repl.js'); NativeModule._cache.repl = replModule; return replModule.exports; } debug('load native module ' + request); return NativeModule.require(filename); } var module = new Module(filename, parent); if (isMain) { process.mainModule = module; module.id = '.'; } Module._cache[filename] = module; var hadException = true; try { module.load(filename); hadException = false; } finally { if (hadException) { delete Module._cache[filename]; } } return module.exports; }; Module._resolveFilename = function(request, parent) { if (NativeModule.exists(request)) { return request; } var resolvedModule = Module._resolveLookupPaths(request, parent); var id = resolvedModule[0]; var paths = resolvedModule[1]; // look up the filename first, since that's the cache key. debug('looking for ' + JSON.stringify(id) + ' in ' + JSON.stringify(paths)); var filename = Module._findPath(request, paths); if (!filename) { var err = new Error("Cannot find module '" + request + "'"); err.code = 'MODULE_NOT_FOUND'; throw err; } return filename; }; Module.prototype.load = function(filename) { debug('load ' + JSON.stringify(filename) + ' for module ' + JSON.stringify(this.id)); assert(!this.loaded); this.filename = filename; this.paths = Module._nodeModulePaths(path.dirname(filename)); var extension = path.extname(filename) || '.js'; if (!Module._extensions[extension]) extension = '.js'; Module._extensions[extension](this, filename); this.loaded = true; }; Module.prototype.require = function(path) { assert(typeof path === 'string', 'path must be a string'); assert(path, 'missing path'); return Module._load(path, this); }; // Resolved path to process.argv[1] will be lazily placed here // (needed for setting breakpoint when called with --debug-brk) var resolvedArgv; // Returns exception if any Module.prototype._compile = function(content, filename) { var self = this; // remove shebang content = content.replace(/^#!.*/, ''); function require(path) { return self.require(path); } require.resolve = function(request) { return Module._resolveFilename(request, self); }; Object.defineProperty(require, 'paths', { get: function() { throw new Error('require.paths is removed. Use ' + 'node_modules folders, or the NODE_PATH ' + 'environment variable instead.'); }}); require.main = process.mainModule; // Enable support to add extra extension types require.extensions = Module._extensions; require.registerExtension = function() { throw new Error('require.registerExtension() removed. Use ' + 'require.extensions instead.'); }; require.cache = Module._cache; var dirname = path.dirname(filename); if (Module._contextLoad) { if (self.id !== '.') { debug('load submodule'); // not root module var sandbox = {}; for (var k in global) { sandbox[k] = global[k]; } sandbox.require = require; sandbox.exports = self.exports; sandbox.__filename = filename; sandbox.__dirname = dirname; sandbox.module = self; sandbox.global = sandbox; sandbox.root = root; return runInNewContext(content, sandbox, filename, true); } debug('load root module'); // root module global.require = require; global.exports = self.exports; global.__filename = filename; global.__dirname = dirname; global.module = self; return runInThisContext(content, filename, true); } // create wrapper function var wrapper = Module.wrap(content); var compiledWrapper = runInThisContext(wrapper, filename, true); if (global.v8debug) { if (!resolvedArgv) { // we enter the repl if we're not given a filename argument. if (process.argv[1]) { resolvedArgv = Module._resolveFilename(process.argv[1], null); } else { resolvedArgv = 'repl'; } } // Set breakpoint on module start if (filename === resolvedArgv) { global.v8debug.Debug.setBreakPoint(compiledWrapper, 0, 0); } } var args = [self.exports, require, self, filename, dirname]; return compiledWrapper.apply(self.exports, args); }; function stripBOM(content) { // Remove byte order marker. This catches EF BB BF (the UTF-8 BOM) // because the buffer-to-string conversion in `fs.readFileSync()` // translates it to FEFF, the UTF-16 BOM. if (content.charCodeAt(0) === 0xFEFF) { content = content.slice(1); } return content; } // Native extension for .js Module._extensions['.js'] = function(module, filename) { var content = NativeModule.require('fs').readFileSync(filename, 'utf8'); module._compile(stripBOM(content), filename); }; // Native extension for .json Module._extensions['.json'] = function(module, filename) { var content = NativeModule.require('fs').readFileSync(filename, 'utf8'); try { module.exports = JSON.parse(stripBOM(content)); } catch (err) { err.message = filename + ': ' + err.message; throw err; } }; //Native extension for .node Module._extensions['.node'] = process.dlopen; // bootstrap main module. Module.runMain = function() { // Load the main module--the command line argument. Module._load(process.argv[1], null, true); // Handle any nextTicks added in the first tick of the program process._tickCallback(); }; Module._initPaths = function() { var isWindows = process.platform === 'win32'; if (isWindows) { var homeDir = process.env.USERPROFILE; } else { var homeDir = process.env.HOME; } var paths = [path.resolve(process.execPath, '..', '..', 'lib', 'node')]; if (homeDir) { paths.unshift(path.resolve(homeDir, '.node_libraries')); paths.unshift(path.resolve(homeDir, '.node_modules')); } if (process.env['NODE_PATH']) { var splitter = isWindows ? ';' : ':'; paths = process.env['NODE_PATH'].split(splitter).concat(paths); } modulePaths = paths; // clone as a read-only copy, for introspection. Module.globalPaths = modulePaths.slice(0); }; // bootstrap repl Module.requireRepl = function() { return Module._load('repl', '.'); }; Module._initPaths(); // backwards compatibility Module.Module = Module; });
References:
- runInThisContext: http://www.cnblogs.com/rubylouvre/archive/2011/11/25/2262521.html
- Script.runInThisContext 以及nodejs的require: http://andy0807.iteye.com/blog/1446769
- 错误的或者过时的关于MooTools污染或防止污染全局作用域的文章,可以看出MooTools污染全局作用域可能是故意为之:http://davidwalsh.name/mootools-nodejs