diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2021-10-17 02:52:05 +0200 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2021-10-17 02:52:05 +0200 |
| commit | 06ecdf2af182034496e2123852deee4a58de1043 (patch) | |
| tree | c8d4eb9664dd368bee5a4bf73dd1e02015ecaf39 /src/app/utils | |
making a shoebox
Diffstat (limited to 'src/app/utils')
| -rw-r--r-- | src/app/utils/data_utils.js | 34 | ||||
| -rw-r--r-- | src/app/utils/file_utils.js | 222 | ||||
| -rw-r--r-- | src/app/utils/math_utils.js | 94 | ||||
| -rw-r--r-- | src/app/utils/random_utils.js | 171 | ||||
| -rw-r--r-- | src/app/utils/set_utils.js | 81 | ||||
| -rw-r--r-- | src/app/utils/text_utils.js | 93 | ||||
| -rw-r--r-- | src/app/utils/unicode_utils.js | 63 |
7 files changed, 758 insertions, 0 deletions
diff --git a/src/app/utils/data_utils.js b/src/app/utils/data_utils.js new file mode 100644 index 0000000..327e594 --- /dev/null +++ b/src/app/utils/data_utils.js @@ -0,0 +1,34 @@ +/** + * Data manipulation utilities. + * @module app/utils/data_utils + */ + +/** + * Convert an enum to a lookup + * @param {Object} dictionary: number -> string + * @return {Object} dictionary: string -> number + */ +export const enumLookup = (dict) => + Object.entries(dict).reduce((lookup, pair) => { + lookup[pair[1]] = parseInt(pair[0]); + return lookup; + }, {}); + +/** + * Divide an array into N-item chunks + * @param {Array} arr array to be chunked + * @param {number} size items per chunk + * @return {Array} array of array chunks + */ +export const chunk = (arr, size) => + arr.length > size + ? [arr.slice(0, size), ...chunk(arr.slice(size), size)] + : [arr]; + +/** + * Delay during async operation + * @param {number} delay time to delay + * @return {Promise} promise that resolves after a delay + */ +export const delay = (delay = 1) => + new Promise((resolve) => setTimeout(resolve, delay)); diff --git a/src/app/utils/file_utils.js b/src/app/utils/file_utils.js new file mode 100644 index 0000000..39e8cd5 --- /dev/null +++ b/src/app/utils/file_utils.js @@ -0,0 +1,222 @@ +/** + * File system utilities. + * @module app/utils/file_utils + */ + +import filesystem from "fs"; +import parseCSV from "csv-parse"; +import stringifyCSVCallback from "csv-stringify"; +import archiver from "archiver"; +import { streamToBuffer } from "@jorgeferrero/stream-to-buffer"; + +import StreamArray from "stream-json/streamers/StreamArray"; +import { chain } from "stream-chain"; +import zlib from "zlib"; + +// eslint-disable-next-line no-control-regex +const SANITIZE_CSV_INITIALS = new RegExp("^[-=+@\\x09\\x0D]"); + +/** + * Helper function to load CSV from a file + * @param {string} inputFile path to the file + * @return {Promise} promise which will resolve with the parsed CSV + */ + +const csvReadOptions = { + columns: true, +}; + +export const loadCSV = (inputFile, options = {}) => + new Promise((resolve, reject) => { + if (!filesystem.existsSync(inputFile)) { + return reject("inputFile does not exist"); + } + const csvOptions = { + ...csvReadOptions, + ...options, + }; + filesystem.readFile(inputFile, "utf8", (error, text) => { + if (error) { + return reject(`Error reading file: ${error}`); + } + try { + parseCSV(text, csvOptions, function (err, data) { + if (err) { + return reject("Error parsing JSON"); + } + resolve(data); + }); + } catch { + reject("Error parsing JSON"); + } + }); + }); + +/** + * Helper function to stringify an array-of-arrays to a CSV (with Promise interface) + * @param {Array[]} rows array of arrays + * @return {Promise} promise which will resolve with the stringified CSV + */ + +export const stringifyCSV = (rows) => + new Promise((resolve, reject) => { + stringifyCSVCallback(rows, (error, output) => { + if (error) { + reject(error); + } else { + resolve(output); + } + }); + }); + +/** + * Helper function to attempt to mitigate malicious CSV data + * @param {Array[]} rows array of arrays + * @return {Array[]} the sanitized input + */ +export const sanitizeCSV = (rows) => + rows.map((row) => + row.map((cell) => + cell && typeof cell === "string" && cell.match(SANITIZE_CSV_INITIALS) + ? "'" + cell + : cell + ) + ); + +/** + * Given a list of files containing CSV, return a binary ZIP + * @param {array} files list of files + * @param {array} files[].data CSV data (array of arrays) + * @param {array} files[].filename filename corresponding to this CSV + * @return {string} the output ZIP file + */ +export const zipCSVs = async (files) => { + const archive = archiver("zip", { + zlib: { level: 9 }, // Compression level. 0 = store uncompressed, 9 = most compression + }); + + await Promise.all( + files.map(async (file) => { + const csvString = await stringifyCSV(sanitizeCSV(file.data)); + archive.append(csvString, { name: file.filename }); + }) + ); + + archive.finalize(); + + return await streamToBuffer(archive); +}; + +/** + * Helper function to load JSON from a file + * @param {string} inputFile path to the file + * @return {Promise} promise which will resolve with the parsed JSON + */ +export const loadJSON = (inputFile) => + new Promise((resolve, reject) => { + if (!filesystem.existsSync(inputFile)) { + return reject("inputFile does not exist"); + } + filesystem.readFile(inputFile, "utf8", (error, text) => { + if (error) { + return reject(`Error reading file: ${error}`); + } + try { + const data = JSON.parse(text); + resolve(data); + } catch { + reject("Error parsing JSON"); + } + }); + }); + +const writeFileOptions = { + replace: true, +}; + +/** + * Helper to write a string to a file + * @param {string} outputFile the file to write to + * @param {string|string[]} data the data to write + * @param {Object} options options, by default will overwrite the existing file + * @return {Promise} promise which will resolve when the file is saved + */ +export const writeFile = (outputFile, data, options = {}) => { + options = { ...writeFileOptions, ...options }; + return new Promise((resolve, reject) => { + if (filesystem.existsSync(outputFile) && !options.replace) { + return reject("outputFile exists"); + } + if (Array.isArray(data)) { + data = data.join("\n"); + } + filesystem.writeFile(outputFile, data, { encoding: "utf8" }, (error) => { + if (error) { + return reject(`Error writing file: ${error}`); + } + resolve(); + }); + }); +}; + +const writeJSONOptions = { + ...writeFileOptions, + indent: true, +}; + +/** + * Helper to write JSON data to a file + * @param {string} outputFile the file to write to + * @param {Object} data the data to write + * @param {Object} options options, by default will overwrite the existing file + * @return {Promise} promise which will resolve when the file is saved + */ +export const writeJSON = (outputFile, data, options = {}) => { + options = { ...writeJSONOptions, ...options }; + return new Promise((resolve, reject) => { + let json; + try { + if (options.indent) { + json = JSON.stringify(data, false, 2); + } else { + json = JSON.stringify(data); + } + } catch { + return reject("couldn't stringify JSON"); + } + writeFile(outputFile, json, options).then(resolve).catch(reject); + }); +}; + +/** + * Helper function to map over a JSON file, possibly gzipped + * @param {string} input the input file to be loaded + * @param {function} mapper the mapper function + * @param {object} state the master state object + */ +export async function mapJSONFile(input, mapper, state) { + return new Promise((resolve, reject) => { + let pipeline = chain( + [filesystem.createReadStream(input)] + .concat(input.match(/\.gz$/) ? [zlib.createGunzip()] : []) + .concat([ + StreamArray.withParser(), + async (record) => { + await mapper(record.key, record.value, state); + }, + ]) + ); + + pipeline.on("data", () => {}); + + pipeline.on("end", () => { + console.log(`Loaded ${input} ${state.dryrun ? " (dryrun)" : ""}`); + resolve(); + }); + + pipeline.on("error", (error) => { + console.error(error); + reject(); + }); + }); +} diff --git a/src/app/utils/math_utils.js b/src/app/utils/math_utils.js new file mode 100644 index 0000000..f5482d3 --- /dev/null +++ b/src/app/utils/math_utils.js @@ -0,0 +1,94 @@ +/** + * Mathematical utilities. + * @module app/utils/math_utils + */ + +/** + * RegExp to check if a string contains only hexadecimal values. + * The string should only contain the characters [0-9] or [a-f], case-insensitive. + * @type {RegExp} + */ +const HEXADECIMAL_REGEXP = new RegExp("^[a-f0-9]+$", "i"); + +/** + * Check if a string is a valid hexadecimal number. + * @param {String} text the string to check + * @return {Boolean} true if the string is a valid hexadecimal number + */ +export const isHexadecimal = (text) => !!text?.match(HEXADECIMAL_REGEXP); + +/** + * Constrain a number between an upper or lower bound. + * @param {Number} value the value to clamp + * @param {Number} low the lower bound + * @param {Number} high the upper bound + * @return {Number} the clamped value + */ +export const clamp = (value, low = 0, high = 1) => + value < low ? low : value < high ? value : high; + +/** + * Implementation of mod that supports negative numbers (unlike JavaScript % operator) + * @param {Number} numerator the modulo numerator + * @param {Number} denominator the modulo denominator + * @return {Number} `numerator mod denominator` + */ +export const mod = (numerator, denominator) => + numerator - denominator * Math.floor(numerator / denominator); + +/** + * Get the mean of a list of numbers (where non-null) + * @param {Array} numbers list of numbers + * @return {number} arithmetic mean + */ +export const arrayMean = (numbers) => { + const nonZero = (numbers || []).filter((number) => !!number); + if (!nonZero.length) return 0; + const sum = nonZero.reduce((a, b) => { + return a + b; + }, 0); + return sum / nonZero.length; +}; + +/** + * Find the (planar) centroid of a set of points + * @param {Array} items list of location-like items having { lat, lng } + * @return {Object} object with averaged lat/lng + */ +export const centroid = (items) => ({ + lat: arrayMean(items.map((item) => item.lat)), + lng: arrayMean(items.map((item) => item.lng)), +}); + +/** + * Returns a gaussian (normal) random function with the given mean and stdev. + * @param {Number} mean center value + * @param {Number} stdev standard deviation (radius around mean) + * @return {Function} function generating numbers with a normal distribution + */ +export function gaussian(mean, stdev, random = Math.random) { + let y2; + let use_last = false; + return () => { + let y1; + if (use_last) { + y1 = y2; + use_last = false; + } else { + let x1, x2, w; + do { + x1 = 2.0 * random() - 1.0; + x2 = 2.0 * random() - 1.0; + w = x1 * x1 + x2 * x2; + } while (w >= 1.0); + w = Math.sqrt((-2.0 * Math.log(w)) / w); + y1 = x1 * w; + y2 = x2 * w; + use_last = true; + } + + return mean + stdev * y1; + // if (retval > 0) return retval; + // return -retval; + }; +} diff --git a/src/app/utils/random_utils.js b/src/app/utils/random_utils.js new file mode 100644 index 0000000..fc3c0f7 --- /dev/null +++ b/src/app/utils/random_utils.js @@ -0,0 +1,171 @@ +/** + * Test data generation utilities. + * @module app/utils/random_utils + */ + +import { LoremIpsum } from "lorem-ipsum"; +import seedrandom from "seedrandom"; + +export const rand = (scale) => scale * (Math.random() * 2 - 1); +export const randint = (scale) => Math.floor(scale * Math.random()); +export const randsign = () => (Math.random() < 0.5 ? -1 : 1); +export const rand_seq = seedrandom("shoebox"); +export const rand_seq_norm = () => Math.pow(rand_seq(), 2) * randsign() * 0.5; +export const choice = (list) => list[randint(list.length)]; +export const randomFruit = () => choice(fruits); + +export const lorem = new LoremIpsum({ + sentencesPerParagraph: { + max: 4, + min: 2, + }, + wordsPerSentence: { + max: 10, + min: 4, + }, +}); + +export const fruits = `agave +allspice +almond +apple +apricot +artichoke +arugula +asparagus +avocado +balsamic +banana +basil +blackbean +blueberry +brandy +broccoli +buckwheat +cacao +cachaca +cantaloupe +caper +caramel +caraway +cardamom +carrot +cashew +cauliflower +cava +cayenne +celery +chambord +champagne +chickpea +chili +chipotle +chive +cilantro +cinnamon +citrus +clementine +clove +coconut +coffee +cornbread +cucumber +cumin +daikon +dill +eggplant +espresso +fennel +fenugreek +flower +garlic +ghee +ginger +grape +grapefruit +greentea +habanero +hazelnut +hibiscus +honey +honeydew +horseradish +huckleberry +jalapeno +jasmine +juniper +kiwi +kohlrabi +kumquat +lavender +leek +lemon +lemongrass +macadamia +mango +maple +marjoram +mint +miso +nori +nutmeg +olive +onion +orange +oregano +papaya +parmesan +parsley +parsnip +peach +peanut +pear +pecan +pepper +pernod +pinenut +pineapple +plum +pluot +poblano +pomegranate +pomelo +potato +pumpkin +quinoa +raisin +raspberry +rhubarb +rosemary +saffron +sage +sake +sauerkraut +savory +seitan +sesame +shallot +paprika +soy +spinach +squash +strawberry +tempei +tequila +thyme +tofu +tomatillo +tomato +turmeric +vanilla +vermouth +violet +walnut +wasabi +watermelon +wheat +yam +yuzu +zucchini` + .split("\n") + .map((text) => text.trim()); diff --git a/src/app/utils/set_utils.js b/src/app/utils/set_utils.js new file mode 100644 index 0000000..88e8fea --- /dev/null +++ b/src/app/utils/set_utils.js @@ -0,0 +1,81 @@ +/** + * Operations on sets. + * @module app/utils/set_utils + */ + +/** + * Determine if `set` contains `subset` + * @param {Set} set the superset + * @param {Set} subset the subset + * @return {Boolean} true if set contains subset + */ +export function isSuperset(set, subset) { + for (let elem of subset) { + if (!set.has(elem)) { + return false; + } + } + return true; +} + +/** + * Return the union (A or B) of two sets + * @param {Set} setA a set + * @param {Set} setB a set + * @return {Boolean} the union of the sets + */ +export function union(setA, setB) { + let _union = new Set(setA); + for (let elem of setB) { + _union.add(elem); + } + return _union; +} + +/** + * Return the intersection (A and B) of two sets + * @param {Set} setA a set + * @param {Set} setB a set + * @return {Boolean} the intersection of the sets + */ +export function intersection(setA, setB) { + let _intersection = new Set(); + for (let elem of setB) { + if (setA.has(elem)) { + _intersection.add(elem); + } + } + return _intersection; +} + +/** + * Return the symmetric difference (A xor B) of two sets + * @param {Set} setA a set + * @param {Set} setB a set + * @return {Boolean} the symmetric difference of the sets + */ +export function symmetricDifference(setA, setB) { + let _difference = new Set(setA); + for (let elem of setB) { + if (_difference.has(elem)) { + _difference.delete(elem); + } else { + _difference.add(elem); + } + } + return _difference; +} + +/** + * Return the difference (A not B) of two sets + * @param {Set} setA a set + * @param {Set} setB a set + * @return {Boolean} the difference of the sets + */ +export function difference(setA, setB) { + let _difference = new Set(setA); + for (let elem of setB) { + _difference.delete(elem); + } + return _difference; +} diff --git a/src/app/utils/text_utils.js b/src/app/utils/text_utils.js new file mode 100644 index 0000000..1f987ec --- /dev/null +++ b/src/app/utils/text_utils.js @@ -0,0 +1,93 @@ +/** + * Text and sorting utilities. + * @module app/utils/text_utils + */ + +/** + * Remove HTML-unfriendly characters from a string + * @param {string} text string to sanitize + * @return {string} the sanitized string + */ +export const sanitize = (text) => + String(text || "").replace(new RegExp("[<>&]", "g"), ""); + +/** + * Remove non-alphanumerics from a string + * @param {string} text string to sanitize + * @return {string} the sanitized string + */ +export const sanitizeName = (text) => + String(text || "").replace(new RegExp("[^-_a-zA-Z0-9]", "g"), ""); + +/** + * Remove HTML from a string + * @param {string} text string to sanitize + * @return {string} the sanitized string + */ +export const stripHTML = (text) => String(text || "").replace(/<[^>]+>/g, ""); + +/** + * Replace HTML special characters with entitites + * @param {string} text string to sanitize + * @return {string} the sanitized string + */ +export const sanitizeHTML = (text) => + String(text || "") + .replace(/&/g, "&") + .replace(/</g, "<") + .replace(/>/g, ">"); + +/** + * Capitalize a single word + * @param {string} text string to capitalize + * @return {string} the capitalized word + */ +export const capitalizeWord = (text) => + text.charAt(0).toUpperCase() + text.slice(1); + +/** + * Capitalize all words in a string + * @param {string} text string to capitalize + * @return {string} the capitalized string + */ +export const capitalize = (text) => + String(text || "") + .split(" ") + .map(capitalizeWord) + .join(" "); + +/** + * Convert a phrase to a slug + * @param {string} text string to slugify + * @return {string} the slugified string + */ +export const slugify = (text) => + String(text || "") + .toLowerCase() + .replace(/\s/g, "-") + .replace(/[^-_a-zA-Z0-9]/g, "-") + .replace(/-+/g, "-"); + +/** + * Sort function, sort strings ascending (a -> z) + */ +// eslint-disable-next-line id-length +export const sortString = (a, b) => a[0].localeCompare(b[0]); + +/** + * Sort function, sort strings descending (z -> a) + */ +// eslint-disable-next-line id-length +export const sortStringDescending = (a, b) => b[0].localeCompare(a[0]); + +/** + * Sort function, sort numbers ascending (0 -> 1) + */ +// eslint-disable-next-line id-length +export const sortNumeric = (a, b) => a[1] - b[1]; + +/** + * Sort function, sort numbers descending (1 -> 0) + */ +// eslint-disable-next-line id-length +export const sortNumericDescending = (a, b) => b[1] - a[1]; diff --git a/src/app/utils/unicode_utils.js b/src/app/utils/unicode_utils.js new file mode 100644 index 0000000..c6a2253 --- /dev/null +++ b/src/app/utils/unicode_utils.js @@ -0,0 +1,63 @@ +/** + * Functions for dealing with Arabic text. + * Based on https://github.com/ahmads/arabicString/ + * For Farsi, it is sufficient to use the `persianRex` NPM module + * @module utils/ar_utils + */ + +import makeEmojiRegexp from "emoji-regex/RGI_Emoji.js"; + +const rtlPunctuation = "،|؟|«|»|؛|٬"; +const ltrPunctuation = "\\.|:|!|-|\\[|\\]|\\(|\\)|\\\\|/"; + +const punctuationRegexp = new RegExp( + "(" + rtlPunctuation + "|" + ltrPunctuation + ")", + "gu" +); +const arabicRegexp = new RegExp("[\u0621-\u0652]", "gu"); +const arabicLettersRegexp = new RegExp("[\u0621-\u064A]", "gu"); +const arabicDiacriticsRegexp = new RegExp("[\u064B-\u0652]", "gu"); +const emojiRegexp = makeEmojiRegexp(); + +/** + * The percentage of Arabic letters in the `String`. + * @param {String} text Text to process + * @returns {Float} percentage from `0.0`` - `1.0` + */ + +export const howArabic = (text) => { + if (!text) return 0.0; + // strip punctuation, digits and spaces + text = text.replace(punctuationRegexp, "").replace(emojiRegexp, ""); + const match = text.match(arabicRegexp) || ""; + return match.length / text.length; +}; + +/** + * Is the `String` Arabic, based on + * a given `threshold` between `0` and `1`. Defaults to `0.79`. + * @param {string} text Text to process + * @param {Float} [threshold=0.79] + * @returns {Boolean} + */ + +export const isArabic = (text, threshold) => { + threshold = threshold || 0.79; + return howArabic(text) >= threshold; +}; + +/** + * Does the `String` have _any_ Arabic letters. + * @param {String} text Text to process + * @returns {Boolean} + */ + +export const hasArabic = (text) => !!arabicLettersRegexp.test(text); + +/** + * Remove the Arabic tashkil -diacritics- from the 'String'. + * @param {String} text Text to process + * @returns {String} + */ + +export const removeTashkel = (text) => text.replace(arabicDiacriticsRegexp, ""); |
