summaryrefslogtreecommitdiff
path: root/src/app/utils/file_utils.js
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2021-10-17 02:52:05 +0200
committerJules Laplace <julescarbon@gmail.com>2021-10-17 02:52:05 +0200
commit06ecdf2af182034496e2123852deee4a58de1043 (patch)
treec8d4eb9664dd368bee5a4bf73dd1e02015ecaf39 /src/app/utils/file_utils.js
making a shoebox
Diffstat (limited to 'src/app/utils/file_utils.js')
-rw-r--r--src/app/utils/file_utils.js222
1 files changed, 222 insertions, 0 deletions
diff --git a/src/app/utils/file_utils.js b/src/app/utils/file_utils.js
new file mode 100644
index 0000000..39e8cd5
--- /dev/null
+++ b/src/app/utils/file_utils.js
@@ -0,0 +1,222 @@
+/**
+ * File system utilities.
+ * @module app/utils/file_utils
+ */
+
+import filesystem from "fs";
+import parseCSV from "csv-parse";
+import stringifyCSVCallback from "csv-stringify";
+import archiver from "archiver";
+import { streamToBuffer } from "@jorgeferrero/stream-to-buffer";
+
+import StreamArray from "stream-json/streamers/StreamArray";
+import { chain } from "stream-chain";
+import zlib from "zlib";
+
+// eslint-disable-next-line no-control-regex
+const SANITIZE_CSV_INITIALS = new RegExp("^[-=+@\\x09\\x0D]");
+
+/**
+ * Helper function to load CSV from a file
+ * @param {string} inputFile path to the file
+ * @return {Promise} promise which will resolve with the parsed CSV
+ */
+
+const csvReadOptions = {
+ columns: true,
+};
+
+export const loadCSV = (inputFile, options = {}) =>
+ new Promise((resolve, reject) => {
+ if (!filesystem.existsSync(inputFile)) {
+ return reject("inputFile does not exist");
+ }
+ const csvOptions = {
+ ...csvReadOptions,
+ ...options,
+ };
+ filesystem.readFile(inputFile, "utf8", (error, text) => {
+ if (error) {
+ return reject(`Error reading file: ${error}`);
+ }
+ try {
+ parseCSV(text, csvOptions, function (err, data) {
+ if (err) {
+ return reject("Error parsing JSON");
+ }
+ resolve(data);
+ });
+ } catch {
+ reject("Error parsing JSON");
+ }
+ });
+ });
+
+/**
+ * Helper function to stringify an array-of-arrays to a CSV (with Promise interface)
+ * @param {Array[]} rows array of arrays
+ * @return {Promise} promise which will resolve with the stringified CSV
+ */
+
+export const stringifyCSV = (rows) =>
+ new Promise((resolve, reject) => {
+ stringifyCSVCallback(rows, (error, output) => {
+ if (error) {
+ reject(error);
+ } else {
+ resolve(output);
+ }
+ });
+ });
+
+/**
+ * Helper function to attempt to mitigate malicious CSV data
+ * @param {Array[]} rows array of arrays
+ * @return {Array[]} the sanitized input
+ */
+export const sanitizeCSV = (rows) =>
+ rows.map((row) =>
+ row.map((cell) =>
+ cell && typeof cell === "string" && cell.match(SANITIZE_CSV_INITIALS)
+ ? "'" + cell
+ : cell
+ )
+ );
+
+/**
+ * Given a list of files containing CSV, return a binary ZIP
+ * @param {array} files list of files
+ * @param {array} files[].data CSV data (array of arrays)
+ * @param {array} files[].filename filename corresponding to this CSV
+ * @return {string} the output ZIP file
+ */
+export const zipCSVs = async (files) => {
+ const archive = archiver("zip", {
+ zlib: { level: 9 }, // Compression level. 0 = store uncompressed, 9 = most compression
+ });
+
+ await Promise.all(
+ files.map(async (file) => {
+ const csvString = await stringifyCSV(sanitizeCSV(file.data));
+ archive.append(csvString, { name: file.filename });
+ })
+ );
+
+ archive.finalize();
+
+ return await streamToBuffer(archive);
+};
+
+/**
+ * Helper function to load JSON from a file
+ * @param {string} inputFile path to the file
+ * @return {Promise} promise which will resolve with the parsed JSON
+ */
+export const loadJSON = (inputFile) =>
+ new Promise((resolve, reject) => {
+ if (!filesystem.existsSync(inputFile)) {
+ return reject("inputFile does not exist");
+ }
+ filesystem.readFile(inputFile, "utf8", (error, text) => {
+ if (error) {
+ return reject(`Error reading file: ${error}`);
+ }
+ try {
+ const data = JSON.parse(text);
+ resolve(data);
+ } catch {
+ reject("Error parsing JSON");
+ }
+ });
+ });
+
+const writeFileOptions = {
+ replace: true,
+};
+
+/**
+ * Helper to write a string to a file
+ * @param {string} outputFile the file to write to
+ * @param {string|string[]} data the data to write
+ * @param {Object} options options, by default will overwrite the existing file
+ * @return {Promise} promise which will resolve when the file is saved
+ */
+export const writeFile = (outputFile, data, options = {}) => {
+ options = { ...writeFileOptions, ...options };
+ return new Promise((resolve, reject) => {
+ if (filesystem.existsSync(outputFile) && !options.replace) {
+ return reject("outputFile exists");
+ }
+ if (Array.isArray(data)) {
+ data = data.join("\n");
+ }
+ filesystem.writeFile(outputFile, data, { encoding: "utf8" }, (error) => {
+ if (error) {
+ return reject(`Error writing file: ${error}`);
+ }
+ resolve();
+ });
+ });
+};
+
+const writeJSONOptions = {
+ ...writeFileOptions,
+ indent: true,
+};
+
+/**
+ * Helper to write JSON data to a file
+ * @param {string} outputFile the file to write to
+ * @param {Object} data the data to write
+ * @param {Object} options options, by default will overwrite the existing file
+ * @return {Promise} promise which will resolve when the file is saved
+ */
+export const writeJSON = (outputFile, data, options = {}) => {
+ options = { ...writeJSONOptions, ...options };
+ return new Promise((resolve, reject) => {
+ let json;
+ try {
+ if (options.indent) {
+ json = JSON.stringify(data, false, 2);
+ } else {
+ json = JSON.stringify(data);
+ }
+ } catch {
+ return reject("couldn't stringify JSON");
+ }
+ writeFile(outputFile, json, options).then(resolve).catch(reject);
+ });
+};
+
+/**
+ * Helper function to map over a JSON file, possibly gzipped
+ * @param {string} input the input file to be loaded
+ * @param {function} mapper the mapper function
+ * @param {object} state the master state object
+ */
+export async function mapJSONFile(input, mapper, state) {
+ return new Promise((resolve, reject) => {
+ let pipeline = chain(
+ [filesystem.createReadStream(input)]
+ .concat(input.match(/\.gz$/) ? [zlib.createGunzip()] : [])
+ .concat([
+ StreamArray.withParser(),
+ async (record) => {
+ await mapper(record.key, record.value, state);
+ },
+ ])
+ );
+
+ pipeline.on("data", () => {});
+
+ pipeline.on("end", () => {
+ console.log(`Loaded ${input} ${state.dryrun ? " (dryrun)" : ""}`);
+ resolve();
+ });
+
+ pipeline.on("error", (error) => {
+ console.error(error);
+ reject();
+ });
+ });
+}