summaryrefslogtreecommitdiff
path: root/load_spreadsheet.js
diff options
context:
space:
mode:
authorJules Laplace <julescarbon@gmail.com>2021-08-23 21:33:42 +0200
committerJules Laplace <julescarbon@gmail.com>2021-08-23 21:33:42 +0200
commita05d52a7b13607181ce0443b17769bb02532dfc1 (patch)
treef5c4722363f29a610d1a4f95efbd9351999083f6 /load_spreadsheet.js
parent5ddde1cbb70bf4bc2df127fced5afb966069d299 (diff)
importing rtf
Diffstat (limited to 'load_spreadsheet.js')
-rw-r--r--load_spreadsheet.js193
1 files changed, 172 insertions, 21 deletions
diff --git a/load_spreadsheet.js b/load_spreadsheet.js
index 347c6eb..3bada97 100644
--- a/load_spreadsheet.js
+++ b/load_spreadsheet.js
@@ -3,30 +3,24 @@
*/
import { loadJSON, loadCSV, writeJSON } from "./file_utils.js";
+import { readdir } from "fs/promises";
+import parseRTF from "rtf-parser";
+import fs from "fs";
+import sizeOf from "image-size";
const datasheetFile = "./data_store/tags.csv";
const dbFile = "./db.json";
-var tagTypes = [
- "No6092",
- "1620s",
- "painting",
- "blunt",
- "National Gallery of Canada",
- "AGO",
- "courtauld",
- "intervensions",
- "connosieurship",
- "double agent",
- "forensics",
- "black box",
-];
+var tagTypes = "No6092,1620s,painting,blunt,National Gallery of Canada,AGO,courtauld,intervensions,connsoeurship,double agent,forensics,black box,Stankievech".split(
+ ","
+);
async function main() {
// basically this script exists to assign the X'd fields from the spreadsheet
// to the okcms json :)
const data = await loadCSV(datasheetFile);
const db = await loadJSON(dbFile);
+ const dataStore = await loadDataStoreIndex("./data_store");
db.page = db.page || [];
db.ui = db.ui || [];
@@ -39,30 +33,187 @@ async function main() {
}, {});
// loop over the CSV data :)
- data.forEach((row, index) => {
- const cell = pageById[index] || {
+ let index = -1;
+ for (let row of data) {
+ index += 1;
+ const record = pageById[index] || {
__index: index,
- id: "post_" + index,
+ id: "page_" + index,
title: row.Title,
};
// loop over the tags...
let tagIndex = 0;
tagTypes.forEach((type, tagId) => {
if (row[type] === "x") {
- cell["tag_" + tagIndex] = tagId + 1;
+ record["tag_" + tagIndex] = tagId + 1;
tagIndex += 1;
}
});
// make sure all other tags are cleared out
for (; tagIndex < 9; tagIndex++) {
- cell["tag_" + tagIndex] = 0;
+ record["tag_" + tagIndex] = 0;
}
+ // if there is a corresponding record in the data store, accumulate it
+ if (String(index + 1) in dataStore) {
+ await loadFiles(dataStore[index + 1], record);
+ }
+ // if we haven't seen this ID before, append it
if (!pageById[index]) {
- db.page.push(cell);
+ db.page.push(record);
}
- });
+ }
await writeJSON(dbFile, db);
}
+async function loadDataStoreIndex(path) {
+ const files = await readdir(path);
+ let parts, index;
+ let folders = {};
+ for (const file of files) {
+ if (file.match(".csv")) continue;
+ if (file.match(".DS_Store")) continue;
+ parts = file.split("-");
+ index = parts[0].trim().replace(/^0/, "");
+ folders[index] = file;
+ }
+ return folders;
+}
+
+async function loadFiles(folder, record) {
+ const path = `./data_store/${folder}/`;
+ const files = await readdir(path);
+ const images = (record.images = []);
+ let dimensions;
+ for (const file of files) {
+ if (file.match(".DS_Store")) continue;
+ if (file.match(/-URL.rtf/i)) {
+ await loadLink(path + file, record);
+ } else if (file.match(/.rtf/i)) {
+ await loadText(path + file, record);
+ } else if (file.match(/.txt/i)) {
+ console.error("+ fix text file", path + file);
+ } else if (file.match(/-thumb/i)) {
+ dimensions = sizeOf(path + file);
+ record.thumbnail = {
+ uri: `assets/data_store/${folder}/${file}`,
+ caption: "",
+ ...dimensions,
+ };
+ } else {
+ dimensions = sizeOf(path + file);
+ images.push({
+ uri: `assets/data_store/${folder}/${file}`,
+ caption: "",
+ ...dimensions,
+ });
+ }
+ }
+}
+
+async function loadText(path, record) {
+ return new Promise((resolve, reject) => {
+ parseRTF.stream(fs.createReadStream(path), (err, doc) => {
+ const paragraphs = doc.content.filter((para) => para.content);
+ const finalParagraph = doc.content.filter((para) => !para.content);
+ record.citation = "";
+ record.description = "";
+ let groupCount = 0;
+ let content = "";
+ record.author = "";
+ record.title = "";
+ paragraphs.forEach((para, paragraphIndex) => {
+ const paragraph = [];
+ para.content.forEach((clip) => {
+ switch (paragraphIndex) {
+ case 0: // number
+ // console.log(clip.value);
+ return;
+ case 1: // author
+ // console.log(clip.value);
+ record.author += getClipValue(clip);
+ return;
+ case 2: // title
+ // console.log(clip.value);
+ record.title += getClipValue(clip);
+ return;
+ default:
+ appendClip(paragraph, clip);
+ }
+ });
+ if (paragraph.length) {
+ if (groupCount < 3) {
+ record.citation += paragraph.join("") + "<br>\n";
+ } else {
+ content += "<p>\n" + paragraph.join("") + "\n</p>\n\n";
+ }
+ }
+ if (!para.content.length) {
+ groupCount += 1;
+ }
+ });
+ const finalParagraphExtract = [];
+ finalParagraph.forEach((clip) => {
+ appendClip(finalParagraphExtract, clip);
+ });
+ if (finalParagraphExtract.length) {
+ content += "<p>\n" + finalParagraphExtract.join("") + "\n</p>\n\n";
+ }
+
+ record.description = content;
+ resolve();
+ });
+ });
+}
+
+function appendClip(paragraph, clip) {
+ paragraph.push(getClipValue(clip));
+}
+function getClipValue(clip) {
+ if (clip.style.italic) {
+ return "<i>" + clip.value + "</i>";
+ } else if (clip.style.underline) {
+ return "<u>" + clip.value + "</u>";
+ } else {
+ return clip.value;
+ }
+}
+
+async function loadLink(path, record) {
+ return new Promise((resolve, reject) => {
+ parseRTF.stream(fs.createReadStream(path), (err, doc) => {
+ const paragraphs = doc.content;
+ let uri;
+ paragraphs.forEach((para, paragraphIndex) => {
+ const paragraph = [];
+ para.content?.forEach((clip) => {
+ if (clip.value.match(/^http/)) {
+ uri = clip.value.trim();
+ }
+ });
+ if (para.value?.match(/^http/)) {
+ uri = para.value.trim();
+ }
+ });
+ let match = uri.match(/\d+/);
+ let token = "";
+ if (match) {
+ token = match[0];
+ } else {
+ console.error("No token:", uri);
+ }
+ record.type = "video";
+ record.images = [
+ {
+ type: "video",
+ caption: "",
+ uri,
+ token,
+ },
+ ];
+ resolve();
+ });
+ });
+}
+
main().then(() => process.exit(0));