2024-11-05 10:14:41 +08:00

47 lines
1.4 KiB
TypeScript

const rsAstralRange = '\\ud800-\\udfff',
rsComboMarksRange = '\\u0300-\\u036f',
reComboHalfMarksRange = '\\ufe20-\\ufe2f',
rsComboSymbolsRange = '\\u20d0-\\u20ff',
rsComboRange =
rsComboMarksRange + reComboHalfMarksRange + rsComboSymbolsRange,
rsVarRange = '\\ufe0e\\ufe0f'
/** Used to compose unicode capture groups. */
const rsAstral = `[${rsAstralRange}]`,
rsCombo = `[${rsComboRange}]`,
rsFitz = '\\ud83c[\\udffb-\\udfff]',
rsModifier = `(?:${rsCombo}|${rsFitz})`,
rsNonAstral = `[^${rsAstralRange}]`,
rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}',
rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]',
rsZWJ = '\\u200d'
/** Used to compose unicode regexes. */
const reOptMod = `${rsModifier}?`,
rsOptVar = `[${rsVarRange}]?`,
rsOptJoin = `(?:${rsZWJ}(?:${[rsNonAstral, rsRegional, rsSurrPair].join(
'|'
)})${rsOptVar}${reOptMod})*`,
rsSeq = rsOptVar + reOptMod + rsOptJoin,
rsSymbol = `(?:${[
`${rsNonAstral + rsCombo}?`,
rsCombo,
rsRegional,
rsSurrPair,
rsAstral,
].join('|')})`
/** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */
const reUnicode = new RegExp(`${rsFitz}(?=${rsFitz})|${rsSymbol}${rsSeq}`, 'g')
/**
* Converts a Unicode `string` to an array.
*
* @private
* @param {string} string The string to convert.
* @returns {Array} Returns the converted array.
*/
export function unicodeToArray(string: string) {
return string.match(reUnicode) || []
}