diff options
| author | Jules Laplace <julescarbon@gmail.com> | 2021-08-23 21:33:42 +0200 |
|---|---|---|
| committer | Jules Laplace <julescarbon@gmail.com> | 2021-08-23 21:33:42 +0200 |
| commit | a05d52a7b13607181ce0443b17769bb02532dfc1 (patch) | |
| tree | f5c4722363f29a610d1a4f95efbd9351999083f6 /load_spreadsheet.js | |
| parent | 5ddde1cbb70bf4bc2df127fced5afb966069d299 (diff) | |
importing rtf
Diffstat (limited to 'load_spreadsheet.js')
| -rw-r--r-- | load_spreadsheet.js | 193 |
1 files changed, 172 insertions, 21 deletions
diff --git a/load_spreadsheet.js b/load_spreadsheet.js index 347c6eb..3bada97 100644 --- a/load_spreadsheet.js +++ b/load_spreadsheet.js @@ -3,30 +3,24 @@ */ import { loadJSON, loadCSV, writeJSON } from "./file_utils.js"; +import { readdir } from "fs/promises"; +import parseRTF from "rtf-parser"; +import fs from "fs"; +import sizeOf from "image-size"; const datasheetFile = "./data_store/tags.csv"; const dbFile = "./db.json"; -var tagTypes = [ - "No6092", - "1620s", - "painting", - "blunt", - "National Gallery of Canada", - "AGO", - "courtauld", - "intervensions", - "connosieurship", - "double agent", - "forensics", - "black box", -]; +var tagTypes = "No6092,1620s,painting,blunt,National Gallery of Canada,AGO,courtauld,intervensions,connsoeurship,double agent,forensics,black box,Stankievech".split( + "," +); async function main() { // basically this script exists to assign the X'd fields from the spreadsheet // to the okcms json :) const data = await loadCSV(datasheetFile); const db = await loadJSON(dbFile); + const dataStore = await loadDataStoreIndex("./data_store"); db.page = db.page || []; db.ui = db.ui || []; @@ -39,30 +33,187 @@ async function main() { }, {}); // loop over the CSV data :) - data.forEach((row, index) => { - const cell = pageById[index] || { + let index = -1; + for (let row of data) { + index += 1; + const record = pageById[index] || { __index: index, - id: "post_" + index, + id: "page_" + index, title: row.Title, }; // loop over the tags... let tagIndex = 0; tagTypes.forEach((type, tagId) => { if (row[type] === "x") { - cell["tag_" + tagIndex] = tagId + 1; + record["tag_" + tagIndex] = tagId + 1; tagIndex += 1; } }); // make sure all other tags are cleared out for (; tagIndex < 9; tagIndex++) { - cell["tag_" + tagIndex] = 0; + record["tag_" + tagIndex] = 0; } + // if there is a corresponding record in the data store, accumulate it + if (String(index + 1) in dataStore) { + await loadFiles(dataStore[index + 1], record); + } + // if we haven't seen this ID before, append it if (!pageById[index]) { - db.page.push(cell); + db.page.push(record); } - }); + } await writeJSON(dbFile, db); } +async function loadDataStoreIndex(path) { + const files = await readdir(path); + let parts, index; + let folders = {}; + for (const file of files) { + if (file.match(".csv")) continue; + if (file.match(".DS_Store")) continue; + parts = file.split("-"); + index = parts[0].trim().replace(/^0/, ""); + folders[index] = file; + } + return folders; +} + +async function loadFiles(folder, record) { + const path = `./data_store/${folder}/`; + const files = await readdir(path); + const images = (record.images = []); + let dimensions; + for (const file of files) { + if (file.match(".DS_Store")) continue; + if (file.match(/-URL.rtf/i)) { + await loadLink(path + file, record); + } else if (file.match(/.rtf/i)) { + await loadText(path + file, record); + } else if (file.match(/.txt/i)) { + console.error("+ fix text file", path + file); + } else if (file.match(/-thumb/i)) { + dimensions = sizeOf(path + file); + record.thumbnail = { + uri: `assets/data_store/${folder}/${file}`, + caption: "", + ...dimensions, + }; + } else { + dimensions = sizeOf(path + file); + images.push({ + uri: `assets/data_store/${folder}/${file}`, + caption: "", + ...dimensions, + }); + } + } +} + +async function loadText(path, record) { + return new Promise((resolve, reject) => { + parseRTF.stream(fs.createReadStream(path), (err, doc) => { + const paragraphs = doc.content.filter((para) => para.content); + const finalParagraph = doc.content.filter((para) => !para.content); + record.citation = ""; + record.description = ""; + let groupCount = 0; + let content = ""; + record.author = ""; + record.title = ""; + paragraphs.forEach((para, paragraphIndex) => { + const paragraph = []; + para.content.forEach((clip) => { + switch (paragraphIndex) { + case 0: // number + // console.log(clip.value); + return; + case 1: // author + // console.log(clip.value); + record.author += getClipValue(clip); + return; + case 2: // title + // console.log(clip.value); + record.title += getClipValue(clip); + return; + default: + appendClip(paragraph, clip); + } + }); + if (paragraph.length) { + if (groupCount < 3) { + record.citation += paragraph.join("") + "<br>\n"; + } else { + content += "<p>\n" + paragraph.join("") + "\n</p>\n\n"; + } + } + if (!para.content.length) { + groupCount += 1; + } + }); + const finalParagraphExtract = []; + finalParagraph.forEach((clip) => { + appendClip(finalParagraphExtract, clip); + }); + if (finalParagraphExtract.length) { + content += "<p>\n" + finalParagraphExtract.join("") + "\n</p>\n\n"; + } + + record.description = content; + resolve(); + }); + }); +} + +function appendClip(paragraph, clip) { + paragraph.push(getClipValue(clip)); +} +function getClipValue(clip) { + if (clip.style.italic) { + return "<i>" + clip.value + "</i>"; + } else if (clip.style.underline) { + return "<u>" + clip.value + "</u>"; + } else { + return clip.value; + } +} + +async function loadLink(path, record) { + return new Promise((resolve, reject) => { + parseRTF.stream(fs.createReadStream(path), (err, doc) => { + const paragraphs = doc.content; + let uri; + paragraphs.forEach((para, paragraphIndex) => { + const paragraph = []; + para.content?.forEach((clip) => { + if (clip.value.match(/^http/)) { + uri = clip.value.trim(); + } + }); + if (para.value?.match(/^http/)) { + uri = para.value.trim(); + } + }); + let match = uri.match(/\d+/); + let token = ""; + if (match) { + token = match[0]; + } else { + console.error("No token:", uri); + } + record.type = "video"; + record.images = [ + { + type: "video", + caption: "", + uri, + token, + }, + ]; + resolve(); + }); + }); +} + main().then(() => process.exit(0)); |
