/** * Load the No.6092 datasheet into the OKCMS JSON */ import { loadJSON, loadCSV, writeJSON } from "./file_utils.js"; import { readdir } from "fs/promises"; import parseRTF from "rtf-parser"; import fs from "fs"; import sizeOf from "image-size"; const datasheetFile = "./data_store/tags.csv"; const dbFile = "./db.json"; var categories = "1620s,painting,blunt,National Gallery of Canada,Art Gallery of Ontario,Courtauld Institute,Stankievech,connsoeurship,double agent,forensics,black box".split( "," ); /** * Load the spreadsheet and RTF files in subdirectories, building the db.json */ async function main() { const data = await loadCSV(datasheetFile); // const db = await loadJSON(dbFile); const db = {}; const dataStore = await loadDataStoreIndex("./data_store"); db.page = db.page || []; db.ui = db.ui || []; // index existing DB entries by ID in case we must merge again const pageById = db.page.reduce((lookup, page) => { const pageId = parseInt(page.id.split("_")[1]); lookup[pageId] = page; return lookup; }, {}); // loop over the CSV data :) let index = -1; for (let row of data) { index += 1; const record = pageById[index] || { __index: index, id: "page_" + index, title: row.Title, short_title: row.Title, type: "image", }; // loop over the tags... let tagIndex = 0; categories.forEach((type, tagId) => { if (row[type] === "x") { record["tag_" + tagIndex] = tagId + 1; tagIndex += 1; } }); // make sure all other tags are cleared out for (; tagIndex < 9; tagIndex++) { record["tag_" + tagIndex] = 0; } // if there is a corresponding record in the data store, accumulate it if (String(index + 1) in dataStore) { await loadFiles(dataStore[index + 1], record); if (!record.images.length) { console.log(`/!\\ No images: ${dataStore[index + 1]} // ${row.Title}`); } } // if we haven't seen this ID before, append it if (!pageById[index]) { db.page.push(record); } } console.log("Done"); await writeJSON(dbFile, db); } /** * Load the list of folders from the data_store */ async function loadDataStoreIndex(path) { const files = await readdir(path); let parts, index; let folders = {}; for (const file of files) { if (file.match(".csv")) continue; if (file.match(".DS_Store")) continue; parts = file.split("-"); index = parts[0].trim().replace(/^0/, ""); folders[index] = file; } return folders; } /** * Load the files from a particular exhibit from their subdirectory in data_store */ async function loadFiles(folder, record) { const path = `data_store/${folder}/`; const files = await readdir(path); const images = (record.images = []); let dimensions; for (const file of files) { if (file.match(".DS_Store")) continue; if (file.match("skip-")) continue; if (file.match(/-URL.rtf/i)) { await loadLink(path + file, record); } else if (file.match(/.rtf/i)) { await loadText(path + file, record); } else if (file.match(/.txt/i)) { console.error("+ fix text file", path + file); } else if (file.match(/.mtl/i)) { // console.error("+ load mtl", path + file); continue; } else if (file.match(/.obj/i)) { console.error("+ load obj", path + file); record.type = "video"; record.threeObject = { path, file, }; } else if (file.match(/-thumb.mp4$/i)) { record.thumbnail = { uri: `assets/data_store/${folder}/${file}`, type: "video", width: 320, height: 240, }; } else if (file.match(/-thumb/i)) { dimensions = sizeOf(path + file); record.thumbnail = { uri: `assets/data_store/${folder}/${file}`, caption: "", ...dimensions, }; } else { if (record.type === "video") continue; dimensions = sizeOf(path + file); images.push({ uri: `assets/data_store/${folder}/${file}`, caption: "", ...dimensions, }); } } } /** * Load the text from an RTF */ async function loadText(path, record) { // const warn = path.match("McCurry", "i"); return new Promise((resolve, reject) => { parseRTF.stream(fs.createReadStream(path), (err, doc) => { // Separate paragraphs from spans since this library doesn't handle // the last paragraph correctly. const paragraphs = doc.content.filter((para) => para.content); const finalParagraph = doc.content.filter((para) => !para.content); record.citation = ""; record.description = ""; let groupCount = 0; let content = ""; record.author = ""; record.title = ""; paragraphs.forEach((para, paragraphIndex) => { const paragraph = []; para.content.forEach((clip) => { switch (paragraphIndex) { case 0: // number // console.log(clip.value); return; case 1: // author // console.log(clip.value); record.author += getClipValue(clip); return; case 2: // title // console.log(clip.value); record.title += getClipValue(clip); return; default: appendClip(paragraph, clip); } }); if (paragraph.length) { if (groupCount < 3) { record.citation += paragraph.join("") + "
\n"; } else { const text = paragraph.join(""); content += "

\n" + text + "\n

\n\n"; } } if (!para.content.length) { groupCount += 1; } }); record.title = record.title.replace(/<\/i>/g, ""); // The last paragraph is just spans for some reason const finalParagraphExtract = []; finalParagraph.forEach((clip) => { appendClip(finalParagraphExtract, clip); }); if (finalParagraphExtract.length) { content += "

\n" + finalParagraphExtract.join("") + "\n

\n\n"; } record.description = content; resolve(); }); }); } /** * Append a clip to a paragraph, adding formating (i.e. italics) */ function appendClip(paragraph, clip) { paragraph.push(getClipValue(clip)); } function getClipValue(clip) { let value = clip.value; if (clip.style.italic) { value = "" + value + ""; } else if (clip.style.bold) { value = "" + value + ""; } else if (clip.style.underline) { value = "" + value + ""; } return value; } /** * Load the video link from those RTF files */ async function loadLink(path, record) { return new Promise((resolve, reject) => { parseRTF.stream(fs.createReadStream(path), (err, doc) => { const paragraphs = doc.content; let uri; paragraphs.forEach((para, paragraphIndex) => { const paragraph = []; para.content?.forEach((clip) => { if (clip.value.match(/^http/)) { uri = clip.value.trim(); } }); if (para.value?.match(/^http/)) { uri = para.value.trim(); } }); // let match = uri.match(/\d+/); // let token = ""; // if (match) { // token = match[0]; // } else { // console.error("No token:", uri); // } console.log(uri); record.type = "video"; record.images = [ { type: "video", caption: "", uri, // token, }, ]; resolve(); }); }); } /** * Load everything and then exit! */ main().then(() => process.exit(0));