/** * Functions for dealing with Arabic text. * Based on https://github.com/ahmads/arabicString/ * For Farsi, it is sufficient to use the `persianRex` NPM module * @module utils/ar_utils */ import makeEmojiRegexp from "emoji-regex/RGI_Emoji.js"; const rtlPunctuation = "،|؟|«|»|؛|٬"; const ltrPunctuation = "\\.|:|!|-|\\[|\\]|\\(|\\)|\\\\|/"; const punctuationRegexp = new RegExp( "(" + rtlPunctuation + "|" + ltrPunctuation + ")", "gu" ); const arabicRegexp = new RegExp("[\u0621-\u0652]", "gu"); const arabicLettersRegexp = new RegExp("[\u0621-\u064A]", "gu"); const arabicDiacriticsRegexp = new RegExp("[\u064B-\u0652]", "gu"); const emojiRegexp = makeEmojiRegexp(); /** * The percentage of Arabic letters in the `String`. * @param {String} text Text to process * @returns {Float} percentage from `0.0`` - `1.0` */ export const howArabic = (text) => { if (!text) return 0.0; // strip punctuation, digits and spaces text = text.replace(punctuationRegexp, "").replace(emojiRegexp, ""); const match = text.match(arabicRegexp) || ""; return match.length / text.length; }; /** * Is the `String` Arabic, based on * a given `threshold` between `0` and `1`. Defaults to `0.79`. * @param {string} text Text to process * @param {Float} [threshold=0.79] * @returns {Boolean} */ export const isArabic = (text, threshold) => { threshold = threshold || 0.79; return howArabic(text) >= threshold; }; /** * Does the `String` have _any_ Arabic letters. * @param {String} text Text to process * @returns {Boolean} */ export const hasArabic = (text) => !!arabicLettersRegexp.test(text); /** * Remove the Arabic tashkil -diacritics- from the 'String'. * @param {String} text Text to process * @returns {String} */ export const removeTashkel = (text) => text.replace(arabicDiacriticsRegexp, "");