|
|
/*! https://mths.be/regenerate v1.4.2 by @mathias | MIT license */ ;(function(root) {
// Detect free variables `exports`.
var freeExports = typeof exports == 'object' && exports;
// Detect free variable `module`.
var freeModule = typeof module == 'object' && module && module.exports == freeExports && module;
// Detect free variable `global`, from Node.js/io.js or Browserified code,
// and use it as `root`.
var freeGlobal = typeof global == 'object' && global; if (freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal) { root = freeGlobal; }
/*--------------------------------------------------------------------------*/
var ERRORS = { 'rangeOrder': 'A range\u2019s `stop` value must be greater than or equal ' + 'to the `start` value.', 'codePointRange': 'Invalid code point value. Code points range from ' + 'U+000000 to U+10FFFF.' };
// https://mathiasbynens.be/notes/javascript-encoding#surrogate-pairs
var HIGH_SURROGATE_MIN = 0xD800; var HIGH_SURROGATE_MAX = 0xDBFF; var LOW_SURROGATE_MIN = 0xDC00; var LOW_SURROGATE_MAX = 0xDFFF;
// In Regenerate output, `\0` is never preceded by `\` because we sort by
// code point value, so let’s keep this regular expression simple.
var regexNull = /\\x00([^0123456789]|$)/g;
var object = {}; var hasOwnProperty = object.hasOwnProperty; var extend = function(destination, source) { var key; for (key in source) { if (hasOwnProperty.call(source, key)) { destination[key] = source[key]; } } return destination; };
var forEach = function(array, callback) { var index = -1; var length = array.length; while (++index < length) { callback(array[index], index); } };
var toString = object.toString; var isArray = function(value) { return toString.call(value) == '[object Array]'; }; var isNumber = function(value) { return typeof value == 'number' || toString.call(value) == '[object Number]'; };
// This assumes that `number` is a positive integer that `toString()`s nicely
// (which is the case for all code point values).
var zeroes = '0000'; var pad = function(number, totalCharacters) { var string = String(number); return string.length < totalCharacters ? (zeroes + string).slice(-totalCharacters) : string; };
var hex = function(number) { return Number(number).toString(16).toUpperCase(); };
var slice = [].slice;
/*--------------------------------------------------------------------------*/
var dataFromCodePoints = function(codePoints) { var index = -1; var length = codePoints.length; var max = length - 1; var result = []; var isStart = true; var tmp; var previous = 0; while (++index < length) { tmp = codePoints[index]; if (isStart) { result.push(tmp); previous = tmp; isStart = false; } else { if (tmp == previous + 1) { if (index != max) { previous = tmp; continue; } else { isStart = true; result.push(tmp + 1); } } else { // End the previous range and start a new one.
result.push(previous + 1, tmp); previous = tmp; } } } if (!isStart) { result.push(tmp + 1); } return result; };
var dataRemove = function(data, codePoint) { // Iterate over the data per `(start, end)` pair.
var index = 0; var start; var end; var length = data.length; while (index < length) { start = data[index]; end = data[index + 1]; if (codePoint >= start && codePoint < end) { // Modify this pair.
if (codePoint == start) { if (end == start + 1) { // Just remove `start` and `end`.
data.splice(index, 2); return data; } else { // Just replace `start` with a new value.
data[index] = codePoint + 1; return data; } } else if (codePoint == end - 1) { // Just replace `end` with a new value.
data[index + 1] = codePoint; return data; } else { // Replace `[start, end]` with `[startA, endA, startB, endB]`.
data.splice(index, 2, start, codePoint, codePoint + 1, end); return data; } } index += 2; } return data; };
var dataRemoveRange = function(data, rangeStart, rangeEnd) { if (rangeEnd < rangeStart) { throw Error(ERRORS.rangeOrder); } // Iterate over the data per `(start, end)` pair.
var index = 0; var start; var end; while (index < data.length) { start = data[index]; end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
// Exit as soon as no more matching pairs can be found.
if (start > rangeEnd) { return data; }
// Check if this range pair is equal to, or forms a subset of, the range
// to be removed.
// E.g. we have `[0, 11, 40, 51]` and want to remove 0-10 → `[40, 51]`.
// E.g. we have `[40, 51]` and want to remove 0-100 → `[]`.
if (rangeStart <= start && rangeEnd >= end) { // Remove this pair.
data.splice(index, 2); continue; }
// Check if both `rangeStart` and `rangeEnd` are within the bounds of
// this pair.
// E.g. we have `[0, 11]` and want to remove 4-6 → `[0, 4, 7, 11]`.
if (rangeStart >= start && rangeEnd < end) { if (rangeStart == start) { // Replace `[start, end]` with `[startB, endB]`.
data[index] = rangeEnd + 1; data[index + 1] = end + 1; return data; } // Replace `[start, end]` with `[startA, endA, startB, endB]`.
data.splice(index, 2, start, rangeStart, rangeEnd + 1, end + 1); return data; }
// Check if only `rangeStart` is within the bounds of this pair.
// E.g. we have `[0, 11]` and want to remove 4-20 → `[0, 4]`.
if (rangeStart >= start && rangeStart <= end) { // Replace `end` with `rangeStart`.
data[index + 1] = rangeStart; // Note: we cannot `return` just yet, in case any following pairs still
// contain matching code points.
// E.g. we have `[0, 11, 14, 31]` and want to remove 4-20
// → `[0, 4, 21, 31]`.
}
// Check if only `rangeEnd` is within the bounds of this pair.
// E.g. we have `[14, 31]` and want to remove 4-20 → `[21, 31]`.
else if (rangeEnd >= start && rangeEnd <= end) { // Just replace `start`.
data[index] = rangeEnd + 1; return data; }
index += 2; } return data; };
var dataAdd = function(data, codePoint) { // Iterate over the data per `(start, end)` pair.
var index = 0; var start; var end; var lastIndex = null; var length = data.length; if (codePoint < 0x0 || codePoint > 0x10FFFF) { throw RangeError(ERRORS.codePointRange); } while (index < length) { start = data[index]; end = data[index + 1];
// Check if the code point is already in the set.
if (codePoint >= start && codePoint < end) { return data; }
if (codePoint == start - 1) { // Just replace `start` with a new value.
data[index] = codePoint; return data; }
// At this point, if `start` is `greater` than `codePoint`, insert a new
// `[start, end]` pair before the current pair, or after the current pair
// if there is a known `lastIndex`.
if (start > codePoint) { data.splice( lastIndex != null ? lastIndex + 2 : 0, 0, codePoint, codePoint + 1 ); return data; }
if (codePoint == end) { // Check if adding this code point causes two separate ranges to become
// a single range, e.g. `dataAdd([0, 4, 5, 10], 4)` → `[0, 10]`.
if (codePoint + 1 == data[index + 2]) { data.splice(index, 4, start, data[index + 3]); return data; } // Else, just replace `end` with a new value.
data[index + 1] = codePoint + 1; return data; } lastIndex = index; index += 2; } // The loop has finished; add the new pair to the end of the data set.
data.push(codePoint, codePoint + 1); return data; };
var dataAddData = function(dataA, dataB) { // Iterate over the data per `(start, end)` pair.
var index = 0; var start; var end; var data = dataA.slice(); var length = dataB.length; while (index < length) { start = dataB[index]; end = dataB[index + 1] - 1; if (start == end) { data = dataAdd(data, start); } else { data = dataAddRange(data, start, end); } index += 2; } return data; };
var dataRemoveData = function(dataA, dataB) { // Iterate over the data per `(start, end)` pair.
var index = 0; var start; var end; var data = dataA.slice(); var length = dataB.length; while (index < length) { start = dataB[index]; end = dataB[index + 1] - 1; if (start == end) { data = dataRemove(data, start); } else { data = dataRemoveRange(data, start, end); } index += 2; } return data; };
var dataAddRange = function(data, rangeStart, rangeEnd) { if (rangeEnd < rangeStart) { throw Error(ERRORS.rangeOrder); } if ( rangeStart < 0x0 || rangeStart > 0x10FFFF || rangeEnd < 0x0 || rangeEnd > 0x10FFFF ) { throw RangeError(ERRORS.codePointRange); } // Iterate over the data per `(start, end)` pair.
var index = 0; var start; var end; var added = false; var length = data.length; while (index < length) { start = data[index]; end = data[index + 1];
if (added) { // The range has already been added to the set; at this point, we just
// need to get rid of the following ranges in case they overlap.
// Check if this range can be combined with the previous range.
if (start == rangeEnd + 1) { data.splice(index - 1, 2); return data; }
// Exit as soon as no more possibly overlapping pairs can be found.
if (start > rangeEnd) { return data; }
// E.g. `[0, 11, 12, 16]` and we’ve added 5-15, so we now have
// `[0, 16, 12, 16]`. Remove the `12,16` part, as it lies within the
// `0,16` range that was previously added.
if (start >= rangeStart && start <= rangeEnd) { // `start` lies within the range that was previously added.
if (end > rangeStart && end - 1 <= rangeEnd) { // `end` lies within the range that was previously added as well,
// so remove this pair.
data.splice(index, 2); index -= 2; // Note: we cannot `return` just yet, as there may still be other
// overlapping pairs.
} else { // `start` lies within the range that was previously added, but
// `end` doesn’t. E.g. `[0, 11, 12, 31]` and we’ve added 5-15, so
// now we have `[0, 16, 12, 31]`. This must be written as `[0, 31]`.
// Remove the previously added `end` and the current `start`.
data.splice(index - 1, 2); index -= 2; }
// Note: we cannot return yet.
}
}
else if (start == rangeEnd + 1 || start == rangeEnd) { data[index] = rangeStart; return data; }
// Check if a new pair must be inserted *before* the current one.
else if (start > rangeEnd) { data.splice(index, 0, rangeStart, rangeEnd + 1); return data; }
else if (rangeStart >= start && rangeStart < end && rangeEnd + 1 <= end) { // The new range lies entirely within an existing range pair. No action
// needed.
return data; }
else if ( // E.g. `[0, 11]` and you add 5-15 → `[0, 16]`.
(rangeStart >= start && rangeStart < end) || // E.g. `[0, 3]` and you add 3-6 → `[0, 7]`.
end == rangeStart ) { // Replace `end` with the new value.
data[index + 1] = rangeEnd + 1; // Make sure the next range pair doesn’t overlap, e.g. `[0, 11, 12, 14]`
// and you add 5-15 → `[0, 16]`, i.e. remove the `12,14` part.
added = true; // Note: we cannot `return` just yet.
}
else if (rangeStart <= start && rangeEnd + 1 >= end) { // The new range is a superset of the old range.
data[index] = rangeStart; data[index + 1] = rangeEnd + 1; added = true; }
index += 2; } // The loop has finished without doing anything; add the new pair to the end
// of the data set.
if (!added) { data.push(rangeStart, rangeEnd + 1); } return data; };
var dataContains = function(data, codePoint) { var index = 0; var length = data.length; // Exit early if `codePoint` is not within `data`’s overall range.
var start = data[index]; var end = data[length - 1]; if (length >= 2) { if (codePoint < start || codePoint > end) { return false; } } // Iterate over the data per `(start, end)` pair.
while (index < length) { start = data[index]; end = data[index + 1]; if (codePoint >= start && codePoint < end) { return true; } index += 2; } return false; };
var dataIntersection = function(data, codePoints) { var index = 0; var length = codePoints.length; var codePoint; var result = []; while (index < length) { codePoint = codePoints[index]; if (dataContains(data, codePoint)) { result.push(codePoint); } ++index; } return dataFromCodePoints(result); };
var dataIsEmpty = function(data) { return !data.length; };
var dataIsSingleton = function(data) { // Check if the set only represents a single code point.
return data.length == 2 && data[0] + 1 == data[1]; };
var dataToArray = function(data) { // Iterate over the data per `(start, end)` pair.
var index = 0; var start; var end; var result = []; var length = data.length; while (index < length) { start = data[index]; end = data[index + 1]; while (start < end) { result.push(start); ++start; } index += 2; } return result; };
/*--------------------------------------------------------------------------*/
// https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
var floor = Math.floor; var highSurrogate = function(codePoint) { return parseInt( floor((codePoint - 0x10000) / 0x400) + HIGH_SURROGATE_MIN, 10 ); };
var lowSurrogate = function(codePoint) { return parseInt( (codePoint - 0x10000) % 0x400 + LOW_SURROGATE_MIN, 10 ); };
var stringFromCharCode = String.fromCharCode; var codePointToString = function(codePoint) { var string; // https://mathiasbynens.be/notes/javascript-escapes#single
// Note: the `\b` escape sequence for U+0008 BACKSPACE in strings has a
// different meaning in regular expressions (word boundary), so it cannot
// be used here.
if (codePoint == 0x09) { string = '\\t'; } // Note: IE < 9 treats `'\v'` as `'v'`, so avoid using it.
// else if (codePoint == 0x0B) {
// string = '\\v';
// }
else if (codePoint == 0x0A) { string = '\\n'; } else if (codePoint == 0x0C) { string = '\\f'; } else if (codePoint == 0x0D) { string = '\\r'; } else if (codePoint == 0x2D) { // https://mathiasbynens.be/notes/javascript-escapes#hexadecimal
// Note: `-` (U+002D HYPHEN-MINUS) is escaped in this way rather
// than by backslash-escaping, in case the output is used outside
// of a character class in a `u` RegExp. /\-/u throws, but
// /\x2D/u is fine.
string = '\\x2D'; } else if (codePoint == 0x5C) { string = '\\\\'; } else if ( codePoint == 0x24 || (codePoint >= 0x28 && codePoint <= 0x2B) || codePoint == 0x2E || codePoint == 0x2F || codePoint == 0x3F || (codePoint >= 0x5B && codePoint <= 0x5E) || (codePoint >= 0x7B && codePoint <= 0x7D) ) { // The code point maps to an unsafe printable ASCII character;
// backslash-escape it. Here’s the list of those symbols:
//
// $()*+./?[\]^{|}
//
// This matches SyntaxCharacters as well as `/` (U+002F SOLIDUS).
// https://tc39.github.io/ecma262/#prod-SyntaxCharacter
string = '\\' + stringFromCharCode(codePoint); } else if (codePoint >= 0x20 && codePoint <= 0x7E) { // The code point maps to one of these printable ASCII symbols
// (including the space character):
//
// !"#%&',/0123456789:;<=>@ABCDEFGHIJKLMNO
// PQRSTUVWXYZ_`abcdefghijklmnopqrstuvwxyz~
//
// These can safely be used directly.
string = stringFromCharCode(codePoint); } else if (codePoint <= 0xFF) { string = '\\x' + pad(hex(codePoint), 2); } else { // `codePoint <= 0xFFFF` holds true.
// https://mathiasbynens.be/notes/javascript-escapes#unicode
string = '\\u' + pad(hex(codePoint), 4); }
// There’s no need to account for astral symbols / surrogate pairs here,
// since `codePointToString` is private and only used for BMP code points.
// But if that’s what you need, just add an `else` block with this code:
//
// string = '\\u' + pad(hex(highSurrogate(codePoint)), 4)
// + '\\u' + pad(hex(lowSurrogate(codePoint)), 4);
return string; };
var codePointToStringUnicode = function(codePoint) { if (codePoint <= 0xFFFF) { return codePointToString(codePoint); } return '\\u{' + codePoint.toString(16).toUpperCase() + '}'; };
var symbolToCodePoint = function(symbol) { var length = symbol.length; var first = symbol.charCodeAt(0); var second; if ( first >= HIGH_SURROGATE_MIN && first <= HIGH_SURROGATE_MAX && length > 1 // There is a next code unit.
) { // `first` is a high surrogate, and there is a next character. Assume
// it’s a low surrogate (else it’s invalid usage of Regenerate anyway).
second = symbol.charCodeAt(1); // https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
return (first - HIGH_SURROGATE_MIN) * 0x400 + second - LOW_SURROGATE_MIN + 0x10000; } return first; };
var createBMPCharacterClasses = function(data) { // Iterate over the data per `(start, end)` pair.
var result = ''; var index = 0; var start; var end; var length = data.length; if (dataIsSingleton(data)) { return codePointToString(data[0]); } while (index < length) { start = data[index]; end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
if (start == end) { result += codePointToString(start); } else if (start + 1 == end) { result += codePointToString(start) + codePointToString(end); } else { result += codePointToString(start) + '-' + codePointToString(end); } index += 2; } return '[' + result + ']'; };
var createUnicodeCharacterClasses = function(data) { // Iterate over the data per `(start, end)` pair.
var result = ''; var index = 0; var start; var end; var length = data.length; if (dataIsSingleton(data)) { return codePointToStringUnicode(data[0]); } while (index < length) { start = data[index]; end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
if (start == end) { result += codePointToStringUnicode(start); } else if (start + 1 == end) { result += codePointToStringUnicode(start) + codePointToStringUnicode(end); } else { result += codePointToStringUnicode(start) + '-' + codePointToStringUnicode(end); } index += 2; } return '[' + result + ']'; };
var splitAtBMP = function(data) { // Iterate over the data per `(start, end)` pair.
var loneHighSurrogates = []; var loneLowSurrogates = []; var bmp = []; var astral = []; var index = 0; var start; var end; var length = data.length; while (index < length) { start = data[index]; end = data[index + 1] - 1; // Note: the `- 1` makes `end` inclusive.
if (start < HIGH_SURROGATE_MIN) {
// The range starts and ends before the high surrogate range.
// E.g. (0, 0x10).
if (end < HIGH_SURROGATE_MIN) { bmp.push(start, end + 1); }
// The range starts before the high surrogate range and ends within it.
// E.g. (0, 0xD855).
if (end >= HIGH_SURROGATE_MIN && end <= HIGH_SURROGATE_MAX) { bmp.push(start, HIGH_SURROGATE_MIN); loneHighSurrogates.push(HIGH_SURROGATE_MIN, end + 1); }
// The range starts before the high surrogate range and ends in the low
// surrogate range. E.g. (0, 0xDCFF).
if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) { bmp.push(start, HIGH_SURROGATE_MIN); loneHighSurrogates.push(HIGH_SURROGATE_MIN, HIGH_SURROGATE_MAX + 1); loneLowSurrogates.push(LOW_SURROGATE_MIN, end + 1); }
// The range starts before the high surrogate range and ends after the
// low surrogate range. E.g. (0, 0x10FFFF).
if (end > LOW_SURROGATE_MAX) { bmp.push(start, HIGH_SURROGATE_MIN); loneHighSurrogates.push(HIGH_SURROGATE_MIN, HIGH_SURROGATE_MAX + 1); loneLowSurrogates.push(LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1); if (end <= 0xFFFF) { bmp.push(LOW_SURROGATE_MAX + 1, end + 1); } else { bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1); astral.push(0xFFFF + 1, end + 1); } }
} else if (start >= HIGH_SURROGATE_MIN && start <= HIGH_SURROGATE_MAX) {
// The range starts and ends in the high surrogate range.
// E.g. (0xD855, 0xD866).
if (end >= HIGH_SURROGATE_MIN && end <= HIGH_SURROGATE_MAX) { loneHighSurrogates.push(start, end + 1); }
// The range starts in the high surrogate range and ends in the low
// surrogate range. E.g. (0xD855, 0xDCFF).
if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) { loneHighSurrogates.push(start, HIGH_SURROGATE_MAX + 1); loneLowSurrogates.push(LOW_SURROGATE_MIN, end + 1); }
// The range starts in the high surrogate range and ends after the low
// surrogate range. E.g. (0xD855, 0x10FFFF).
if (end > LOW_SURROGATE_MAX) { loneHighSurrogates.push(start, HIGH_SURROGATE_MAX + 1); loneLowSurrogates.push(LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1); if (end <= 0xFFFF) { bmp.push(LOW_SURROGATE_MAX + 1, end + 1); } else { bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1); astral.push(0xFFFF + 1, end + 1); } }
} else if (start >= LOW_SURROGATE_MIN && start <= LOW_SURROGATE_MAX) {
// The range starts and ends in the low surrogate range.
// E.g. (0xDCFF, 0xDDFF).
if (end >= LOW_SURROGATE_MIN && end <= LOW_SURROGATE_MAX) { loneLowSurrogates.push(start, end + 1); }
// The range starts in the low surrogate range and ends after the low
// surrogate range. E.g. (0xDCFF, 0x10FFFF).
if (end > LOW_SURROGATE_MAX) { loneLowSurrogates.push(start, LOW_SURROGATE_MAX + 1); if (end <= 0xFFFF) { bmp.push(LOW_SURROGATE_MAX + 1, end + 1); } else { bmp.push(LOW_SURROGATE_MAX + 1, 0xFFFF + 1); astral.push(0xFFFF + 1, end + 1); } }
} else if (start > LOW_SURROGATE_MAX && start <= 0xFFFF) {
// The range starts and ends after the low surrogate range.
// E.g. (0xFFAA, 0x10FFFF).
if (end <= 0xFFFF) { bmp.push(start, end + 1); } else { bmp.push(start, 0xFFFF + 1); astral.push(0xFFFF + 1, end + 1); }
} else {
// The range starts and ends in the astral range.
astral.push(start, end + 1);
}
index += 2; } return { 'loneHighSurrogates': loneHighSurrogates, 'loneLowSurrogates': loneLowSurrogates, 'bmp': bmp, 'astral': astral }; };
var optimizeSurrogateMappings = function(surrogateMappings) { var result = []; var tmpLow = []; var addLow = false; var mapping; var nextMapping; var highSurrogates; var lowSurrogates; var nextHighSurrogates; var nextLowSurrogates; var index = -1; var length = surrogateMappings.length; while (++index < length) { mapping = surrogateMappings[index]; nextMapping = surrogateMappings[index + 1]; if (!nextMapping) { result.push(mapping); continue; } highSurrogates = mapping[0]; lowSurrogates = mapping[1]; nextHighSurrogates = nextMapping[0]; nextLowSurrogates = nextMapping[1];
// Check for identical high surrogate ranges.
tmpLow = lowSurrogates; while ( nextHighSurrogates && highSurrogates[0] == nextHighSurrogates[0] && highSurrogates[1] == nextHighSurrogates[1] ) { // Merge with the next item.
if (dataIsSingleton(nextLowSurrogates)) { tmpLow = dataAdd(tmpLow, nextLowSurrogates[0]); } else { tmpLow = dataAddRange( tmpLow, nextLowSurrogates[0], nextLowSurrogates[1] - 1 ); } ++index; mapping = surrogateMappings[index]; highSurrogates = mapping[0]; lowSurrogates = mapping[1]; nextMapping = surrogateMappings[index + 1]; nextHighSurrogates = nextMapping && nextMapping[0]; nextLowSurrogates = nextMapping && nextMapping[1]; addLow = true; } result.push([ highSurrogates, addLow ? tmpLow : lowSurrogates ]); addLow = false; } return optimizeByLowSurrogates(result); };
var optimizeByLowSurrogates = function(surrogateMappings) { if (surrogateMappings.length == 1) { return surrogateMappings; } var index = -1; var innerIndex = -1; while (++index < surrogateMappings.length) { var mapping = surrogateMappings[index]; var lowSurrogates = mapping[1]; var lowSurrogateStart = lowSurrogates[0]; var lowSurrogateEnd = lowSurrogates[1]; innerIndex = index; // Note: the loop starts at the next index.
while (++innerIndex < surrogateMappings.length) { var otherMapping = surrogateMappings[innerIndex]; var otherLowSurrogates = otherMapping[1]; var otherLowSurrogateStart = otherLowSurrogates[0]; var otherLowSurrogateEnd = otherLowSurrogates[1]; if ( lowSurrogateStart == otherLowSurrogateStart && lowSurrogateEnd == otherLowSurrogateEnd && otherLowSurrogates.length === 2 ) { // Add the code points in the other item to this one.
if (dataIsSingleton(otherMapping[0])) { mapping[0] = dataAdd(mapping[0], otherMapping[0][0]); } else { mapping[0] = dataAddRange( mapping[0], otherMapping[0][0], otherMapping[0][1] - 1 ); } // Remove the other, now redundant, item.
surrogateMappings.splice(innerIndex, 1); --innerIndex; } } } return surrogateMappings; };
var surrogateSet = function(data) { // Exit early if `data` is an empty set.
if (!data.length) { return []; }
// Iterate over the data per `(start, end)` pair.
var index = 0; var start; var end; var startHigh; var startLow; var endHigh; var endLow; var surrogateMappings = []; var length = data.length; while (index < length) { start = data[index]; end = data[index + 1] - 1;
startHigh = highSurrogate(start); startLow = lowSurrogate(start); endHigh = highSurrogate(end); endLow = lowSurrogate(end);
var startsWithLowestLowSurrogate = startLow == LOW_SURROGATE_MIN; var endsWithHighestLowSurrogate = endLow == LOW_SURROGATE_MAX; var complete = false;
// Append the previous high-surrogate-to-low-surrogate mappings.
// Step 1: `(startHigh, startLow)` to `(startHigh, LOW_SURROGATE_MAX)`.
if ( startHigh == endHigh || startsWithLowestLowSurrogate && endsWithHighestLowSurrogate ) { surrogateMappings.push([ [startHigh, endHigh + 1], [startLow, endLow + 1] ]); complete = true; } else { surrogateMappings.push([ [startHigh, startHigh + 1], [startLow, LOW_SURROGATE_MAX + 1] ]); }
// Step 2: `(startHigh + 1, LOW_SURROGATE_MIN)` to
// `(endHigh - 1, LOW_SURROGATE_MAX)`.
if (!complete && startHigh + 1 < endHigh) { if (endsWithHighestLowSurrogate) { // Combine step 2 and step 3.
surrogateMappings.push([ [startHigh + 1, endHigh + 1], [LOW_SURROGATE_MIN, endLow + 1] ]); complete = true; } else { surrogateMappings.push([ [startHigh + 1, endHigh], [LOW_SURROGATE_MIN, LOW_SURROGATE_MAX + 1] ]); } }
// Step 3. `(endHigh, LOW_SURROGATE_MIN)` to `(endHigh, endLow)`.
if (!complete) { surrogateMappings.push([ [endHigh, endHigh + 1], [LOW_SURROGATE_MIN, endLow + 1] ]); }
index += 2; }
// The format of `surrogateMappings` is as follows:
//
// [ surrogateMapping1, surrogateMapping2 ]
//
// i.e.:
//
// [
// [ highSurrogates1, lowSurrogates1 ],
// [ highSurrogates2, lowSurrogates2 ]
// ]
return optimizeSurrogateMappings(surrogateMappings); };
var createSurrogateCharacterClasses = function(surrogateMappings) { var result = []; forEach(surrogateMappings, function(surrogateMapping) { var highSurrogates = surrogateMapping[0]; var lowSurrogates = surrogateMapping[1]; result.push( createBMPCharacterClasses(highSurrogates) + createBMPCharacterClasses(lowSurrogates) ); }); return result.join('|'); };
var createCharacterClassesFromData = function(data, bmpOnly, hasUnicodeFlag) { if (hasUnicodeFlag) { return createUnicodeCharacterClasses(data); } var result = [];
var parts = splitAtBMP(data); var loneHighSurrogates = parts.loneHighSurrogates; var loneLowSurrogates = parts.loneLowSurrogates; var bmp = parts.bmp; var astral = parts.astral; var hasLoneHighSurrogates = !dataIsEmpty(loneHighSurrogates); var hasLoneLowSurrogates = !dataIsEmpty(loneLowSurrogates);
var surrogateMappings = surrogateSet(astral);
if (bmpOnly) { bmp = dataAddData(bmp, loneHighSurrogates); hasLoneHighSurrogates = false; bmp = dataAddData(bmp, loneLowSurrogates); hasLoneLowSurrogates = false; }
if (!dataIsEmpty(bmp)) { // The data set contains BMP code points that are not high surrogates
// needed for astral code points in the set.
result.push(createBMPCharacterClasses(bmp)); } if (surrogateMappings.length) { // The data set contains astral code points; append character classes
// based on their surrogate pairs.
result.push(createSurrogateCharacterClasses(surrogateMappings)); } // https://gist.github.com/mathiasbynens/bbe7f870208abcfec860
if (hasLoneHighSurrogates) { result.push( createBMPCharacterClasses(loneHighSurrogates) + // Make sure the high surrogates aren’t part of a surrogate pair.
'(?![\\uDC00-\\uDFFF])' ); } if (hasLoneLowSurrogates) { result.push( // It is not possible to accurately assert the low surrogates aren’t
// part of a surrogate pair, since JavaScript regular expressions do
// not support lookbehind.
'(?:[^\\uD800-\\uDBFF]|^)' + createBMPCharacterClasses(loneLowSurrogates) ); } return result.join('|'); };
/*--------------------------------------------------------------------------*/
// `regenerate` can be used as a constructor (and new methods can be added to
// its prototype) but also as a regular function, the latter of which is the
// documented and most common usage. For that reason, it’s not capitalized.
var regenerate = function(value) { if (arguments.length > 1) { value = slice.call(arguments); } if (this instanceof regenerate) { this.data = []; return value ? this.add(value) : this; } return (new regenerate).add(value); };
regenerate.version = '1.4.2';
var proto = regenerate.prototype; extend(proto, { 'add': function(value) { var $this = this; if (value == null) { return $this; } if (value instanceof regenerate) { // Allow passing other Regenerate instances.
$this.data = dataAddData($this.data, value.data); return $this; } if (arguments.length > 1) { value = slice.call(arguments); } if (isArray(value)) { forEach(value, function(item) { $this.add(item); }); return $this; } $this.data = dataAdd( $this.data, isNumber(value) ? value : symbolToCodePoint(value) ); return $this; }, 'remove': function(value) { var $this = this; if (value == null) { return $this; } if (value instanceof regenerate) { // Allow passing other Regenerate instances.
$this.data = dataRemoveData($this.data, value.data); return $this; } if (arguments.length > 1) { value = slice.call(arguments); } if (isArray(value)) { forEach(value, function(item) { $this.remove(item); }); return $this; } $this.data = dataRemove( $this.data, isNumber(value) ? value : symbolToCodePoint(value) ); return $this; }, 'addRange': function(start, end) { var $this = this; $this.data = dataAddRange($this.data, isNumber(start) ? start : symbolToCodePoint(start), isNumber(end) ? end : symbolToCodePoint(end) ); return $this; }, 'removeRange': function(start, end) { var $this = this; var startCodePoint = isNumber(start) ? start : symbolToCodePoint(start); var endCodePoint = isNumber(end) ? end : symbolToCodePoint(end); $this.data = dataRemoveRange( $this.data, startCodePoint, endCodePoint ); return $this; }, 'intersection': function(argument) { var $this = this; // Allow passing other Regenerate instances.
// TODO: Optimize this by writing and using `dataIntersectionData()`.
var array = argument instanceof regenerate ? dataToArray(argument.data) : argument; $this.data = dataIntersection($this.data, array); return $this; }, 'contains': function(codePoint) { return dataContains( this.data, isNumber(codePoint) ? codePoint : symbolToCodePoint(codePoint) ); }, 'clone': function() { var set = new regenerate; set.data = this.data.slice(0); return set; }, 'toString': function(options) { var result = createCharacterClassesFromData( this.data, options ? options.bmpOnly : false, options ? options.hasUnicodeFlag : false ); if (!result) { // For an empty set, return something that can be inserted `/here/` to
// form a valid regular expression. Avoid `(?:)` since that matches the
// empty string.
return '[]'; } // Use `\0` instead of `\x00` where possible.
return result.replace(regexNull, '\\0$1'); }, 'toRegExp': function(flags) { var pattern = this.toString( flags && flags.indexOf('u') != -1 ? { 'hasUnicodeFlag': true } : null ); return RegExp(pattern, flags || ''); }, 'valueOf': function() { // Note: `valueOf` is aliased as `toArray`.
return dataToArray(this.data); } });
proto.toArray = proto.valueOf;
// Some AMD build optimizers, like r.js, check for specific condition patterns
// like the following:
if ( typeof define == 'function' && typeof define.amd == 'object' && define.amd ) { define(function() { return regenerate; }); } else if (freeExports && !freeExports.nodeType) { if (freeModule) { // in Node.js, io.js, or RingoJS v0.8.0+
freeModule.exports = regenerate; } else { // in Narwhal or RingoJS v0.7.0-
freeExports.regenerate = regenerate; } } else { // in Rhino or a web browser
root.regenerate = regenerate; }
}(this));
|