diff options
Diffstat (limited to 'node_modules/string_decoder/lib')
-rw-r--r-- | node_modules/string_decoder/lib/string_decoder.js | 296 |
1 files changed, 296 insertions, 0 deletions
diff --git a/node_modules/string_decoder/lib/string_decoder.js b/node_modules/string_decoder/lib/string_decoder.js new file mode 100644 index 0000000..2e89e63 --- /dev/null +++ b/node_modules/string_decoder/lib/string_decoder.js @@ -0,0 +1,296 @@ +// Copyright Joyent, Inc. and other Node contributors. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to permit +// persons to whom the Software is furnished to do so, subject to the +// following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN +// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +// USE OR OTHER DEALINGS IN THE SOFTWARE. + +'use strict'; + +/*<replacement>*/ + +var Buffer = require('safe-buffer').Buffer; +/*</replacement>*/ + +var isEncoding = Buffer.isEncoding || function (encoding) { + encoding = '' + encoding; + switch (encoding && encoding.toLowerCase()) { + case 'hex':case 'utf8':case 'utf-8':case 'ascii':case 'binary':case 'base64':case 'ucs2':case 'ucs-2':case 'utf16le':case 'utf-16le':case 'raw': + return true; + default: + return false; + } +}; + +function _normalizeEncoding(enc) { + if (!enc) return 'utf8'; + var retried; + while (true) { + switch (enc) { + case 'utf8': + case 'utf-8': + return 'utf8'; + case 'ucs2': + case 'ucs-2': + case 'utf16le': + case 'utf-16le': + return 'utf16le'; + case 'latin1': + case 'binary': + return 'latin1'; + case 'base64': + case 'ascii': + case 'hex': + return enc; + default: + if (retried) return; // undefined + enc = ('' + enc).toLowerCase(); + retried = true; + } + } +}; + +// Do not cache `Buffer.isEncoding` when checking encoding names as some +// modules monkey-patch it to support additional encodings +function normalizeEncoding(enc) { + var nenc = _normalizeEncoding(enc); + if (typeof nenc !== 'string' && (Buffer.isEncoding === isEncoding || !isEncoding(enc))) throw new Error('Unknown encoding: ' + enc); + return nenc || enc; +} + +// StringDecoder provides an interface for efficiently splitting a series of +// buffers into a series of JS strings without breaking apart multi-byte +// characters. +exports.StringDecoder = StringDecoder; +function StringDecoder(encoding) { + this.encoding = normalizeEncoding(encoding); + var nb; + switch (this.encoding) { + case 'utf16le': + this.text = utf16Text; + this.end = utf16End; + nb = 4; + break; + case 'utf8': + this.fillLast = utf8FillLast; + nb = 4; + break; + case 'base64': + this.text = base64Text; + this.end = base64End; + nb = 3; + break; + default: + this.write = simpleWrite; + this.end = simpleEnd; + return; + } + this.lastNeed = 0; + this.lastTotal = 0; + this.lastChar = Buffer.allocUnsafe(nb); +} + +StringDecoder.prototype.write = function (buf) { + if (buf.length === 0) return ''; + var r; + var i; + if (this.lastNeed) { + r = this.fillLast(buf); + if (r === undefined) return ''; + i = this.lastNeed; + this.lastNeed = 0; + } else { + i = 0; + } + if (i < buf.length) return r ? r + this.text(buf, i) : this.text(buf, i); + return r || ''; +}; + +StringDecoder.prototype.end = utf8End; + +// Returns only complete characters in a Buffer +StringDecoder.prototype.text = utf8Text; + +// Attempts to complete a partial non-UTF-8 character using bytes from a Buffer +StringDecoder.prototype.fillLast = function (buf) { + if (this.lastNeed <= buf.length) { + buf.copy(this.lastChar, this.lastTotal - this.lastNeed, 0, this.lastNeed); + return this.lastChar.toString(this.encoding, 0, this.lastTotal); + } + buf.copy(this.lastChar, this.lastTotal - this.lastNeed, 0, buf.length); + this.lastNeed -= buf.length; +}; + +// Checks the type of a UTF-8 byte, whether it's ASCII, a leading byte, or a +// continuation byte. If an invalid byte is detected, -2 is returned. +function utf8CheckByte(byte) { + if (byte <= 0x7F) return 0;else if (byte >> 5 === 0x06) return 2;else if (byte >> 4 === 0x0E) return 3;else if (byte >> 3 === 0x1E) return 4; + return byte >> 6 === 0x02 ? -1 : -2; +} + +// Checks at most 3 bytes at the end of a Buffer in order to detect an +// incomplete multi-byte UTF-8 character. The total number of bytes (2, 3, or 4) +// needed to complete the UTF-8 character (if applicable) are returned. +function utf8CheckIncomplete(self, buf, i) { + var j = buf.length - 1; + if (j < i) return 0; + var nb = utf8CheckByte(buf[j]); + if (nb >= 0) { + if (nb > 0) self.lastNeed = nb - 1; + return nb; + } + if (--j < i || nb === -2) return 0; + nb = utf8CheckByte(buf[j]); + if (nb >= 0) { + if (nb > 0) self.lastNeed = nb - 2; + return nb; + } + if (--j < i || nb === -2) return 0; + nb = utf8CheckByte(buf[j]); + if (nb >= 0) { + if (nb > 0) { + if (nb === 2) nb = 0;else self.lastNeed = nb - 3; + } + return nb; + } + return 0; +} + +// Validates as many continuation bytes for a multi-byte UTF-8 character as +// needed or are available. If we see a non-continuation byte where we expect +// one, we "replace" the validated continuation bytes we've seen so far with +// a single UTF-8 replacement character ('\ufffd'), to match v8's UTF-8 decoding +// behavior. The continuation byte check is included three times in the case +// where all of the continuation bytes for a character exist in the same buffer. +// It is also done this way as a slight performance increase instead of using a +// loop. +function utf8CheckExtraBytes(self, buf, p) { + if ((buf[0] & 0xC0) !== 0x80) { + self.lastNeed = 0; + return '\ufffd'; + } + if (self.lastNeed > 1 && buf.length > 1) { + if ((buf[1] & 0xC0) !== 0x80) { + self.lastNeed = 1; + return '\ufffd'; + } + if (self.lastNeed > 2 && buf.length > 2) { + if ((buf[2] & 0xC0) !== 0x80) { + self.lastNeed = 2; + return '\ufffd'; + } + } + } +} + +// Attempts to complete a multi-byte UTF-8 character using bytes from a Buffer. +function utf8FillLast(buf) { + var p = this.lastTotal - this.lastNeed; + var r = utf8CheckExtraBytes(this, buf, p); + if (r !== undefined) return r; + if (this.lastNeed <= buf.length) { + buf.copy(this.lastChar, p, 0, this.lastNeed); + return this.lastChar.toString(this.encoding, 0, this.lastTotal); + } + buf.copy(this.lastChar, p, 0, buf.length); + this.lastNeed -= buf.length; +} + +// Returns all complete UTF-8 characters in a Buffer. If the Buffer ended on a +// partial character, the character's bytes are buffered until the required +// number of bytes are available. +function utf8Text(buf, i) { + var total = utf8CheckIncomplete(this, buf, i); + if (!this.lastNeed) return buf.toString('utf8', i); + this.lastTotal = total; + var end = buf.length - (total - this.lastNeed); + buf.copy(this.lastChar, 0, end); + return buf.toString('utf8', i, end); +} + +// For UTF-8, a replacement character is added when ending on a partial +// character. +function utf8End(buf) { + var r = buf && buf.length ? this.write(buf) : ''; + if (this.lastNeed) return r + '\ufffd'; + return r; +} + +// UTF-16LE typically needs two bytes per character, but even if we have an even +// number of bytes available, we need to check if we end on a leading/high +// surrogate. In that case, we need to wait for the next two bytes in order to +// decode the last character properly. +function utf16Text(buf, i) { + if ((buf.length - i) % 2 === 0) { + var r = buf.toString('utf16le', i); + if (r) { + var c = r.charCodeAt(r.length - 1); + if (c >= 0xD800 && c <= 0xDBFF) { + this.lastNeed = 2; + this.lastTotal = 4; + this.lastChar[0] = buf[buf.length - 2]; + this.lastChar[1] = buf[buf.length - 1]; + return r.slice(0, -1); + } + } + return r; + } + this.lastNeed = 1; + this.lastTotal = 2; + this.lastChar[0] = buf[buf.length - 1]; + return buf.toString('utf16le', i, buf.length - 1); +} + +// For UTF-16LE we do not explicitly append special replacement characters if we +// end on a partial character, we simply let v8 handle that. +function utf16End(buf) { + var r = buf && buf.length ? this.write(buf) : ''; + if (this.lastNeed) { + var end = this.lastTotal - this.lastNeed; + return r + this.lastChar.toString('utf16le', 0, end); + } + return r; +} + +function base64Text(buf, i) { + var n = (buf.length - i) % 3; + if (n === 0) return buf.toString('base64', i); + this.lastNeed = 3 - n; + this.lastTotal = 3; + if (n === 1) { + this.lastChar[0] = buf[buf.length - 1]; + } else { + this.lastChar[0] = buf[buf.length - 2]; + this.lastChar[1] = buf[buf.length - 1]; + } + return buf.toString('base64', i, buf.length - n); +} + +function base64End(buf) { + var r = buf && buf.length ? this.write(buf) : ''; + if (this.lastNeed) return r + this.lastChar.toString('base64', 0, 3 - this.lastNeed); + return r; +} + +// Pass bytes on through for single-byte encodings (e.g. ascii, latin1, hex) +function simpleWrite(buf) { + return buf.toString(this.encoding); +} + +function simpleEnd(buf) { + return buf && buf.length ? this.write(buf) : ''; +}
\ No newline at end of file |