diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2021-10-17 02:52:05 +0200 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2021-10-17 02:52:05 +0200 |
| commit | 06ecdf2af182034496e2123852deee4a58de1043 (patch) | |
| tree | c8d4eb9664dd368bee5a4bf73dd1e02015ecaf39 /src/app/utils/unicode_utils.js | |
making a shoebox
Diffstat (limited to 'src/app/utils/unicode_utils.js')
| -rw-r--r-- | src/app/utils/unicode_utils.js | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/src/app/utils/unicode_utils.js b/src/app/utils/unicode_utils.js new file mode 100644 index 0000000..c6a2253 --- /dev/null +++ b/src/app/utils/unicode_utils.js @@ -0,0 +1,63 @@ +/** + * Functions for dealing with Arabic text. + * Based on https://github.com/ahmads/arabicString/ + * For Farsi, it is sufficient to use the `persianRex` NPM module + * @module utils/ar_utils + */ + +import makeEmojiRegexp from "emoji-regex/RGI_Emoji.js"; + +const rtlPunctuation = "،|؟|«|»|؛|٬"; +const ltrPunctuation = "\\.|:|!|-|\\[|\\]|\\(|\\)|\\\\|/"; + +const punctuationRegexp = new RegExp( + "(" + rtlPunctuation + "|" + ltrPunctuation + ")", + "gu" +); +const arabicRegexp = new RegExp("[\u0621-\u0652]", "gu"); +const arabicLettersRegexp = new RegExp("[\u0621-\u064A]", "gu"); +const arabicDiacriticsRegexp = new RegExp("[\u064B-\u0652]", "gu"); +const emojiRegexp = makeEmojiRegexp(); + +/** + * The percentage of Arabic letters in the `String`. + * @param {String} text Text to process + * @returns {Float} percentage from `0.0`` - `1.0` + */ + +export const howArabic = (text) => { + if (!text) return 0.0; + // strip punctuation, digits and spaces + text = text.replace(punctuationRegexp, "").replace(emojiRegexp, ""); + const match = text.match(arabicRegexp) || ""; + return match.length / text.length; +}; + +/** + * Is the `String` Arabic, based on + * a given `threshold` between `0` and `1`. Defaults to `0.79`. + * @param {string} text Text to process + * @param {Float} [threshold=0.79] + * @returns {Boolean} + */ + +export const isArabic = (text, threshold) => { + threshold = threshold || 0.79; + return howArabic(text) >= threshold; +}; + +/** + * Does the `String` have _any_ Arabic letters. + * @param {String} text Text to process + * @returns {Boolean} + */ + +export const hasArabic = (text) => !!arabicLettersRegexp.test(text); + +/** + * Remove the Arabic tashkil -diacritics- from the 'String'. + * @param {String} text Text to process + * @returns {String} + */ + +export const removeTashkel = (text) => text.replace(arabicDiacriticsRegexp, ""); |
