blob: c6a22536bdcb1971b8a07bf2380b1e05b06ecd5e (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
/**
* Functions for dealing with Arabic text.
* Based on https://github.com/ahmads/arabicString/
* For Farsi, it is sufficient to use the `persianRex` NPM module
* @module utils/ar_utils
*/
import makeEmojiRegexp from "emoji-regex/RGI_Emoji.js";
const rtlPunctuation = "،|؟|«|»|؛|٬";
const ltrPunctuation = "\\.|:|!|-|\\[|\\]|\\(|\\)|\\\\|/";
const punctuationRegexp = new RegExp(
"(" + rtlPunctuation + "|" + ltrPunctuation + ")",
"gu"
);
const arabicRegexp = new RegExp("[\u0621-\u0652]", "gu");
const arabicLettersRegexp = new RegExp("[\u0621-\u064A]", "gu");
const arabicDiacriticsRegexp = new RegExp("[\u064B-\u0652]", "gu");
const emojiRegexp = makeEmojiRegexp();
/**
* The percentage of Arabic letters in the `String`.
* @param {String} text Text to process
* @returns {Float} percentage from `0.0`` - `1.0`
*/
export const howArabic = (text) => {
if (!text) return 0.0;
// strip punctuation, digits and spaces
text = text.replace(punctuationRegexp, "").replace(emojiRegexp, "");
const match = text.match(arabicRegexp) || "";
return match.length / text.length;
};
/**
* Is the `String` Arabic, based on
* a given `threshold` between `0` and `1`. Defaults to `0.79`.
* @param {string} text Text to process
* @param {Float} [threshold=0.79]
* @returns {Boolean}
*/
export const isArabic = (text, threshold) => {
threshold = threshold || 0.79;
return howArabic(text) >= threshold;
};
/**
* Does the `String` have _any_ Arabic letters.
* @param {String} text Text to process
* @returns {Boolean}
*/
export const hasArabic = (text) => !!arabicLettersRegexp.test(text);
/**
* Remove the Arabic tashkil -diacritics- from the 'String'.
* @param {String} text Text to process
* @returns {String}
*/
export const removeTashkel = (text) => text.replace(arabicDiacriticsRegexp, "");
|