aboutsummaryrefslogtreecommitdiff
path: root/node_modules/lodash/_unicodeWords.js
diff options
context:
space:
mode:
Diffstat (limited to 'node_modules/lodash/_unicodeWords.js')
-rw-r--r--node_modules/lodash/_unicodeWords.js69
1 files changed, 69 insertions, 0 deletions
diff --git a/node_modules/lodash/_unicodeWords.js b/node_modules/lodash/_unicodeWords.js
new file mode 100644
index 0000000..e72e6e0
--- /dev/null
+++ b/node_modules/lodash/_unicodeWords.js
@@ -0,0 +1,69 @@
+/** Used to compose unicode character classes. */
+var rsAstralRange = '\\ud800-\\udfff',
+ rsComboMarksRange = '\\u0300-\\u036f',
+ reComboHalfMarksRange = '\\ufe20-\\ufe2f',
+ rsComboSymbolsRange = '\\u20d0-\\u20ff',
+ rsComboRange = rsComboMarksRange + reComboHalfMarksRange + rsComboSymbolsRange,
+ rsDingbatRange = '\\u2700-\\u27bf',
+ rsLowerRange = 'a-z\\xdf-\\xf6\\xf8-\\xff',
+ rsMathOpRange = '\\xac\\xb1\\xd7\\xf7',
+ rsNonCharRange = '\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf',
+ rsPunctuationRange = '\\u2000-\\u206f',
+ rsSpaceRange = ' \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000',
+ rsUpperRange = 'A-Z\\xc0-\\xd6\\xd8-\\xde',
+ rsVarRange = '\\ufe0e\\ufe0f',
+ rsBreakRange = rsMathOpRange + rsNonCharRange + rsPunctuationRange + rsSpaceRange;
+
+/** Used to compose unicode capture groups. */
+var rsApos = "['\u2019]",
+ rsBreak = '[' + rsBreakRange + ']',
+ rsCombo = '[' + rsComboRange + ']',
+ rsDigits = '\\d+',
+ rsDingbat = '[' + rsDingbatRange + ']',
+ rsLower = '[' + rsLowerRange + ']',
+ rsMisc = '[^' + rsAstralRange + rsBreakRange + rsDigits + rsDingbatRange + rsLowerRange + rsUpperRange + ']',
+ rsFitz = '\\ud83c[\\udffb-\\udfff]',
+ rsModifier = '(?:' + rsCombo + '|' + rsFitz + ')',
+ rsNonAstral = '[^' + rsAstralRange + ']',
+ rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}',
+ rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]',
+ rsUpper = '[' + rsUpperRange + ']',
+ rsZWJ = '\\u200d';
+
+/** Used to compose unicode regexes. */
+var rsMiscLower = '(?:' + rsLower + '|' + rsMisc + ')',
+ rsMiscUpper = '(?:' + rsUpper + '|' + rsMisc + ')',
+ rsOptContrLower = '(?:' + rsApos + '(?:d|ll|m|re|s|t|ve))?',
+ rsOptContrUpper = '(?:' + rsApos + '(?:D|LL|M|RE|S|T|VE))?',
+ reOptMod = rsModifier + '?',
+ rsOptVar = '[' + rsVarRange + ']?',
+ rsOptJoin = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + ')' + rsOptVar + reOptMod + ')*',
+ rsOrdLower = '\\d*(?:1st|2nd|3rd|(?![123])\\dth)(?=\\b|[A-Z_])',
+ rsOrdUpper = '\\d*(?:1ST|2ND|3RD|(?![123])\\dTH)(?=\\b|[a-z_])',
+ rsSeq = rsOptVar + reOptMod + rsOptJoin,
+ rsEmoji = '(?:' + [rsDingbat, rsRegional, rsSurrPair].join('|') + ')' + rsSeq;
+
+/** Used to match complex or compound words. */
+var reUnicodeWord = RegExp([
+ rsUpper + '?' + rsLower + '+' + rsOptContrLower + '(?=' + [rsBreak, rsUpper, '$'].join('|') + ')',
+ rsMiscUpper + '+' + rsOptContrUpper + '(?=' + [rsBreak, rsUpper + rsMiscLower, '$'].join('|') + ')',
+ rsUpper + '?' + rsMiscLower + '+' + rsOptContrLower,
+ rsUpper + '+' + rsOptContrUpper,
+ rsOrdUpper,
+ rsOrdLower,
+ rsDigits,
+ rsEmoji
+].join('|'), 'g');
+
+/**
+ * Splits a Unicode `string` into an array of its words.
+ *
+ * @private
+ * @param {string} The string to inspect.
+ * @returns {Array} Returns the words of `string`.
+ */
+function unicodeWords(string) {
+ return string.match(reUnicodeWord) || [];
+}
+
+module.exports = unicodeWords;