summaryrefslogtreecommitdiff
path: root/src/app/utils/unicode_utils.js
blob: c6a22536bdcb1971b8a07bf2380b1e05b06ecd5e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
/**
 * Functions for dealing with Arabic text.
 * Based on https://github.com/ahmads/arabicString/
 * For Farsi, it is sufficient to use the `persianRex` NPM module
 * @module utils/ar_utils
 */

import makeEmojiRegexp from "emoji-regex/RGI_Emoji.js";

const rtlPunctuation = "،|؟|«|»|؛|٬";
const ltrPunctuation = "\\.|:|!|-|\\[|\\]|\\(|\\)|\\\\|/";

const punctuationRegexp = new RegExp(
  "(" + rtlPunctuation + "|" + ltrPunctuation + ")",
  "gu"
);
const arabicRegexp = new RegExp("[\u0621-\u0652]", "gu");
const arabicLettersRegexp = new RegExp("[\u0621-\u064A]", "gu");
const arabicDiacriticsRegexp = new RegExp("[\u064B-\u0652]", "gu");
const emojiRegexp = makeEmojiRegexp();

/**
 * The percentage of Arabic letters in the `String`.
 * @param {String} text  Text to process
 * @returns {Float} percentage from `0.0`` - `1.0`
 */

export const howArabic = (text) => {
  if (!text) return 0.0;
  // strip punctuation, digits and spaces
  text = text.replace(punctuationRegexp, "").replace(emojiRegexp, "");
  const match = text.match(arabicRegexp) || "";
  return match.length / text.length;
};

/**
 * Is the `String` Arabic, based on
 * a given `threshold` between `0` and `1`. Defaults to `0.79`.
 * @param {string} text  Text to process
 * @param {Float} [threshold=0.79]
 * @returns {Boolean}
 */

export const isArabic = (text, threshold) => {
  threshold = threshold || 0.79;
  return howArabic(text) >= threshold;
};

/**
 * Does the `String` have _any_ Arabic letters.
 * @param {String} text  Text to process
 * @returns {Boolean}
 */

export const hasArabic = (text) => !!arabicLettersRegexp.test(text);

/**
 * Remove the Arabic tashkil -diacritics- from the 'String'.
 * @param {String} text  Text to process
 * @returns {String}
 */

export const removeTashkel = (text) => text.replace(arabicDiacriticsRegexp, "");