/**
* Load the No.6092 datasheet into the OKCMS JSON
*/
import { loadJSON, loadCSV, writeJSON } from "./file_utils.js";
import { readdir } from "fs/promises";
import parseRTF from "rtf-parser";
import fs from "fs";
import sizeOf from "image-size";
const datasheetFile = "./data_store/tags.csv";
const dbFile = "./db.json";
var tagTypes = "No6092,1620s,painting,blunt,National Gallery of Canada,AGO,courtauld,intervensions,connsoeurship,double agent,forensics,black box,Stankievech".split(
","
);
async function main() {
// basically this script exists to assign the X'd fields from the spreadsheet
// to the okcms json :)
const data = await loadCSV(datasheetFile);
const db = await loadJSON(dbFile);
const dataStore = await loadDataStoreIndex("./data_store");
db.page = db.page || [];
db.ui = db.ui || [];
// index existing DB entries by ID in case we must merge again
const pageById = db.page.reduce((lookup, page) => {
const pageId = parseInt(page.id.split("_")[1]);
lookup[pageId] = page;
return lookup;
}, {});
// loop over the CSV data :)
let index = -1;
for (let row of data) {
index += 1;
const record = pageById[index] || {
__index: index,
id: "page_" + index,
title: row.Title,
type: "image",
};
// loop over the tags...
let tagIndex = 0;
tagTypes.forEach((type, tagId) => {
if (row[type] === "x") {
record["tag_" + tagIndex] = tagId + 1;
tagIndex += 1;
}
});
// make sure all other tags are cleared out
for (; tagIndex < 9; tagIndex++) {
record["tag_" + tagIndex] = 0;
}
// if there is a corresponding record in the data store, accumulate it
if (String(index + 1) in dataStore) {
await loadFiles(dataStore[index + 1], record);
}
// if we haven't seen this ID before, append it
if (!pageById[index]) {
db.page.push(record);
}
}
await writeJSON(dbFile, db);
}
async function loadDataStoreIndex(path) {
const files = await readdir(path);
let parts, index;
let folders = {};
for (const file of files) {
if (file.match(".csv")) continue;
if (file.match(".DS_Store")) continue;
parts = file.split("-");
index = parts[0].trim().replace(/^0/, "");
folders[index] = file;
}
return folders;
}
async function loadFiles(folder, record) {
const path = `./data_store/${folder}/`;
const files = await readdir(path);
const images = (record.images = []);
let dimensions;
for (const file of files) {
if (file.match(".DS_Store")) continue;
if (file.match(/-URL.rtf/i)) {
await loadLink(path + file, record);
} else if (file.match(/.rtf/i)) {
await loadText(path + file, record);
} else if (file.match(/.txt/i)) {
console.error("+ fix text file", path + file);
} else if (file.match(/-thumb/i)) {
dimensions = sizeOf(path + file);
record.thumbnail = {
uri: `assets/data_store/${folder}/${file}`,
caption: "",
...dimensions,
};
} else {
dimensions = sizeOf(path + file);
images.push({
uri: `assets/data_store/${folder}/${file}`,
caption: "",
...dimensions,
});
}
}
}
async function loadText(path, record) {
return new Promise((resolve, reject) => {
parseRTF.stream(fs.createReadStream(path), (err, doc) => {
const paragraphs = doc.content.filter((para) => para.content);
const finalParagraph = doc.content.filter((para) => !para.content);
record.citation = "";
record.description = "";
let groupCount = 0;
let content = "";
record.author = "";
record.title = "";
paragraphs.forEach((para, paragraphIndex) => {
const paragraph = [];
para.content.forEach((clip) => {
switch (paragraphIndex) {
case 0: // number
// console.log(clip.value);
return;
case 1: // author
// console.log(clip.value);
record.author += getClipValue(clip);
return;
case 2: // title
// console.log(clip.value);
record.title += getClipValue(clip);
return;
default:
appendClip(paragraph, clip);
}
});
if (paragraph.length) {
if (groupCount < 3) {
record.citation += paragraph.join("") + "
\n";
} else {
content += "
\n" + paragraph.join("") + "\n
\n\n"; } } if (!para.content.length) { groupCount += 1; } }); const finalParagraphExtract = []; finalParagraph.forEach((clip) => { appendClip(finalParagraphExtract, clip); }); if (finalParagraphExtract.length) { content += "\n" + finalParagraphExtract.join("") + "\n
\n\n"; } record.description = content; resolve(); }); }); } function appendClip(paragraph, clip) { paragraph.push(getClipValue(clip)); } function getClipValue(clip) { if (clip.style.italic) { return "" + clip.value + ""; } else if (clip.style.underline) { return "" + clip.value + ""; } else { return clip.value; } } async function loadLink(path, record) { return new Promise((resolve, reject) => { parseRTF.stream(fs.createReadStream(path), (err, doc) => { const paragraphs = doc.content; let uri; paragraphs.forEach((para, paragraphIndex) => { const paragraph = []; para.content?.forEach((clip) => { if (clip.value.match(/^http/)) { uri = clip.value.trim(); } }); if (para.value?.match(/^http/)) { uri = para.value.trim(); } }); let match = uri.match(/\d+/); let token = ""; if (match) { token = match[0]; } else { console.error("No token:", uri); } record.type = "video"; record.images = [ { type: "video", caption: "", uri, token, }, ]; resolve(); }); }); } main().then(() => process.exit(0));