/**
* Load the No.6092 datasheet into the OKCMS JSON
*/
import { loadJSON, loadCSV, writeJSON } from "./file_utils.js";
import { readdir } from "fs/promises";
import parseRTF from "rtf-parser";
import fs from "fs";
import sizeOf from "image-size";
const datasheetFile = "./data_store/tags.csv";
const dbFile = "./db.json";
var categories = "1620s,painting,blunt,National Gallery of Canada,Art Gallery of Ontario,Courtauld Institute,Stankievech,connsoeurship,double agent,forensics,black box".split(
","
);
/**
* Load the spreadsheet and RTF files in subdirectories, building the db.json
*/
async function main() {
const data = await loadCSV(datasheetFile);
// const db = await loadJSON(dbFile);
const db = {};
const dataStore = await loadDataStoreIndex("./data_store");
db.page = db.page || [];
db.ui = db.ui || [];
// index existing DB entries by ID in case we must merge again
const pageById = db.page.reduce((lookup, page) => {
const pageId = parseInt(page.id.split("_")[1]);
lookup[pageId] = page;
return lookup;
}, {});
// loop over the CSV data :)
let index = -1;
for (let row of data) {
index += 1;
const record = pageById[index] || {
__index: index,
id: "page_" + index,
title: row.Title,
short_title: row.Title,
type: "image",
};
// loop over the tags...
let tagIndex = 0;
categories.forEach((type, tagId) => {
if (row[type] === "x") {
record["tag_" + tagIndex] = tagId + 1;
tagIndex += 1;
}
});
// make sure all other tags are cleared out
for (; tagIndex < 9; tagIndex++) {
record["tag_" + tagIndex] = 0;
}
// if there is a corresponding record in the data store, accumulate it
if (String(index + 1) in dataStore) {
await loadFiles(dataStore[index + 1], record);
if (!record.images.length) {
console.log(`/!\\ No images: ${dataStore[index + 1]} // ${row.Title}`);
}
}
// if we haven't seen this ID before, append it
if (!pageById[index]) {
db.page.push(record);
}
}
console.log("Done");
await writeJSON(dbFile, db);
}
/**
* Load the list of folders from the data_store
*/
async function loadDataStoreIndex(path) {
const files = await readdir(path);
let parts, index;
let folders = {};
for (const file of files) {
if (file.match(".csv")) continue;
if (file.match(".DS_Store")) continue;
parts = file.split("-");
index = parts[0].trim().replace(/^0/, "");
folders[index] = file;
}
return folders;
}
/**
* Load the files from a particular exhibit from their subdirectory in data_store
*/
async function loadFiles(folder, record) {
const path = `data_store/${folder}/`;
const files = await readdir(path);
const images = (record.images = []);
let dimensions;
for (const file of files) {
if (file.match(".DS_Store")) continue;
if (file.match("skip-")) continue;
if (file.match(/-URL.rtf/i)) {
await loadLink(path + file, record);
} else if (file.match(/.rtf/i)) {
await loadText(path + file, record);
} else if (file.match(/.txt/i)) {
console.error("+ fix text file", path + file);
} else if (file.match(/.mtl/i)) {
// console.error("+ load mtl", path + file);
continue;
} else if (file.match(/.obj/i)) {
console.error("+ load obj", path + file);
record.type = "video";
record.threeObject = {
path,
file,
};
} else if (file.match(/-thumb.mp4$/i)) {
record.thumbnail = {
uri: `assets/data_store/${folder}/${file}`,
type: "video",
width: 320,
height: 240,
};
} else if (file.match(/-thumb/i)) {
dimensions = sizeOf(path + file);
record.thumbnail = {
uri: `assets/data_store/${folder}/${file}`,
caption: "",
...dimensions,
};
} else {
if (record.type === "video") continue;
dimensions = sizeOf(path + file);
images.push({
uri: `assets/data_store/${folder}/${file}`,
caption: "",
...dimensions,
});
}
}
}
/**
* Load the text from an RTF
*/
async function loadText(path, record) {
// const warn = path.match("McCurry", "i");
return new Promise((resolve, reject) => {
parseRTF.stream(fs.createReadStream(path), (err, doc) => {
// Separate paragraphs from spans since this library doesn't handle
// the last paragraph correctly.
const paragraphs = doc.content.filter((para) => para.content);
const finalParagraph = doc.content.filter((para) => !para.content);
record.citation = "";
record.description = "";
let groupCount = 0;
let content = "";
record.author = "";
record.title = "";
paragraphs.forEach((para, paragraphIndex) => {
const paragraph = [];
para.content.forEach((clip) => {
switch (paragraphIndex) {
case 0: // number
// console.log(clip.value);
return;
case 1: // author
// console.log(clip.value);
record.author += getClipValue(clip);
return;
case 2: // title
// console.log(clip.value);
record.title += getClipValue(clip);
return;
default:
appendClip(paragraph, clip);
}
});
if (paragraph.length) {
if (groupCount < 3) {
record.citation += paragraph.join("") + "
\n";
} else {
const text = paragraph.join("");
content += "
\n" + text + "\n
\n\n"; } } if (!para.content.length) { groupCount += 1; } }); record.title = record.title.replace(/<\/i>/g, ""); // The last paragraph is just spans for some reason const finalParagraphExtract = []; finalParagraph.forEach((clip) => { appendClip(finalParagraphExtract, clip); }); if (finalParagraphExtract.length) { content += "\n" + finalParagraphExtract.join("") + "\n
\n\n"; } record.description = content; resolve(); }); }); } /** * Append a clip to a paragraph, adding formating (i.e. italics) */ function appendClip(paragraph, clip) { paragraph.push(getClipValue(clip)); } function getClipValue(clip) { let value = clip.value; if (clip.style.italic) { value = "" + value + ""; } else if (clip.style.bold) { value = "" + value + ""; } else if (clip.style.underline) { value = "" + value + ""; } return value; } /** * Load the video link from those RTF files */ async function loadLink(path, record) { return new Promise((resolve, reject) => { parseRTF.stream(fs.createReadStream(path), (err, doc) => { const paragraphs = doc.content; let uri; paragraphs.forEach((para, paragraphIndex) => { const paragraph = []; para.content?.forEach((clip) => { if (clip.value.match(/^http/)) { uri = clip.value.trim(); } }); if (para.value?.match(/^http/)) { uri = para.value.trim(); } }); // let match = uri.match(/\d+/); // let token = ""; // if (match) { // token = match[0]; // } else { // console.error("No token:", uri); // } console.log(uri); record.type = "video"; record.images = [ { type: "video", caption: "", uri, // token, }, ]; resolve(); }); }); } /** * Load everything and then exit! */ main().then(() => process.exit(0));