animism-align/cli/commands/site/export.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361

import click

from app.settings import app_cfg
from app.utils.file_utils import load_text, write_json, write_text
from os.path import join
from functools import reduce
from shutil import copyfile
import os

@click.command('info')
# @click.option('-g', '--graph', 'opt_graph_path', required=True,
#   help='Graph name')
@click.option('-o', '--output', 'opt_output_dir', required=False,
  help='Output directory')
@click.pass_context
def cli(ctx, opt_output_dir):
  """Export a graph"""

  # ------------------------------------------------
  # imports

  import datetime
  from distutils.dir_util import copy_tree

  # ------------------------------------------------
  # export settings

  page_title = "Animism: Episode 1"
  page_name = "episode1"
  page_desc = "A Report on Migrating Souls in Museums and Moving Pictures"

  page_url = "/" + page_name
  media_url = "/" + page_name + "/media"

  site_title = f"{page_title}: {page_desc}"
  site_path = opt_output_dir or datetime.datetime.now().strftime("animism_%Y%m%d%H%M")
  site_fp_static = join(app_cfg.DIR_EXPORTS, site_path, 'static')
  site_fp_out = join(app_cfg.DIR_EXPORTS, site_path, page_name)
  site_fp_media = join(app_cfg.DIR_EXPORTS, site_path, page_name, 'media')

  # ------------------------------------------------
  # load the db

  db = export_db()
  prune_db(db)
  media_to_copy = rewrite_db_media(db, site_fp_media, media_url)

  # ------------------------------------------------
  # build the index.html

  index_html = load_text(join(app_cfg.DIR_STATIC, 'site.html'), split=False)
  index_html = index_html.replace('SITE_PATH', page_url)
  index_html = index_html.replace('PAGE_TITLE', page_title)
  index_html = index_html.replace('PAGE_DESCRIPTION', page_desc)
  index_html = index_html.replace('PLAIN_CONTENT', plain_content(db, site_title))
  index_html = index_html.replace('BUNDLE_PATH', join(page_url, 'bundle.js'))
  write_text(index_html, join(site_fp_out, 'index.html'))

  # ------------------------------------------------
  # build the index.json

  write_json(db, join(site_fp_out, 'index.json'), default=str, minify=False)

  # ------------------------------------------------
  # write custom css

  # site_css = load_text(join(app_cfg.DIR_STATIC, 'site.css'), split=False)
  # index_html = index_html.replace('SITE_PATH', page_url)
  # write_text(site_css, join(site_fp_out, 'site.css'))

  # ------------------------------------------------
  # copy media from the exhibition

  copy_media(site_fp_media, media_to_copy)

  # ------------------------------------------------
  # copy any static assets

  copy_tree(join(app_cfg.DIR_STATIC, 'fonts'), join(site_fp_static, 'fonts'))
  copy_tree(join(app_cfg.DIR_STATIC, 'img'), join(site_fp_static, 'img'))

  # ------------------------------------------------
  # build javascript

  # print("Building javascript...")
  # print(f'NODE_ENV=production node ./node_modules/webpack-cli/bin/cli.js --config ./webpack.config.site.js -o {graph_dir}/bundle.js')
  # os.chdir(app_cfg.DIR_PROJECT_ROOT)
  # os.system(f'NODE_ENV=production node ./node_modules/webpack-cli/bin/cli.js --config ./webpack.config.site.js -o {graph_dir}/bundle.js')

  print("Site export complete!")
  print(f"Site exported to: {site_fp_out}")

######################################################################
# Database Functions
######################################################################

def copy_media(fp_media, to_copy):
  os.makedirs(fp_media, exist_ok=True)
  print(f"copying {len(to_copy.keys())} uploaded files")
  total_size = 0
  for fp in to_copy.values():
    copyfile(fp['src'], fp['dst'])
    total_size += os.path.getsize(fp['dst'])
  print(f"wrote {round(total_size / 1000000, 2)} MB")

def rewrite_db_media(db, fp_out, url_out):
  """
  Go over all the media and find any Upload objects.
  Figure out which to copy, and rewrite DB to use the export URL schema.
  """
  to_copy = {}
  for item in IterateTable(db['media']):
    settings = item['settings']

    # images - various sizes... we don't use fullsize anywhere
    if item['type'] == 'image':
      if 'fullsize' in settings:
        del settings['fullsize']
      for field in app_cfg.IMAGE_UPLOAD_FIELDS:
        if field in settings:
          settings[field] = rewrite_upload(to_copy, settings[field], fp_out, url_out)
    # videos - poster images
    elif item['type'] == 'video':
      if 'poster' in settings:
        settings['poster'] = rewrite_upload(to_copy, settings['poster'], fp_out, url_out)
    # galleries - a bunch of lookups... we PROBABLY don't need image_lookup (fullsize)
    elif item['type'] == 'gallery':
      new_image_lookup = {}
      for id in settings['image_order']:
        id = str(id)
        if id in settings['image_lookup']:
          new_image_lookup[id] = rewrite_upload(to_copy, settings['image_lookup'][id], fp_out, url_out, png_only=True)
      settings['image_lookup'] = new_image_lookup

      for field in app_cfg.IMAGE_UPLOAD_GALLERY_LOOKUPS:
        for id in settings['image_order']:
          id = str(id)
          if id in settings[field]:
            settings[field][id] = rewrite_upload(to_copy, settings[field][id], fp_out, url_out)
    # files - singleton file uploads
    elif item['type'] == 'file':
      if 'file' in settings:
        settings['file'] = rewrite_upload(to_copy, settings['file'], fp_out, url_out)
  return to_copy

def rewrite_upload(to_copy, item, fp_out, url_out, png_only=False):
  """
  # rewriting uploads. they look like this:
  "fn": "koester.gif",
  "sha256": "c7c25e8d9be8b3e5db89df0f4a35f8a599dfdcf8bf9bc1f6c4137c7b6522d710",
  "tag": "file",
  "url": "/static/data_store/uploads/file/koester.gif",
  "username": "animism"
  """
  if 'sha256' not in item:
    return item
  if png_only and item['ext'] != '.png':
    return
  sha = item['sha256']
  out_fn = sha + item['ext']
  out_obj = {
    "url": join(url_out, out_fn),
  }

  if sha not in to_copy:
    # print(f"SHA: {sha}")
    in_fn = item['fn']
    in_path = join(app_cfg.DIR_UPLOADS, item['tag'], in_fn)
    if os.path.exists(in_path):
      to_copy[sha] = {
        "src": in_path,
        "dst": join(fp_out, out_fn)
      }
    else:
      print(f"Missing path: {in_path}")

  return out_obj

def prune_db(db):
  """Remove random stuff from the JSON that doesn't need to be there
  - extraneous paragraphs
  - extraneous media
  """
  seen_paras = {}
  seen_media = {}
  for a in IterateTable(db['annotation']):
    seen_paras[a['paragraph_id']] = True
    if 'media_id' in a['settings']:
      seen_media[a['settings']['media_id']] = True

  db['paragraph'] = filter_db(db, 'paragraph', seen_paras)
  db['media'] = filter_db(db, 'media', seen_media)

def filter_db(db, table, seen):
  order = list(filter(lambda i: i in seen, db[table]['order']))
  lookup = { id: db[table]['lookup'][id] for id in order }
  return { 'order': order, 'lookup': lookup }

def export_db():
  """Load the entire database and convert it to JSON"""
  from app.sql.common import db, Session, Episode, Venue, Annotation, Paragraph, Media, Upload

  session = Session()

  classes = [ Episode, Venue, Annotation, Paragraph, Media ]
  data = {}

  for c in classes:
    e_q = session.query(c)
    if c == Annotation or c == Paragraph:
      e_q = e_q.order_by(c.start_ts)
    e_list = e_q.all()
    order = list(map(get_id, e_list))
    lookup = reduce(get_json_tup, e_list, {})
    table_name = str(c.__table__)
    data[table_name] = { 'order': order, 'lookup': lookup }
    print(f"""exported {table_name} ({len(order)} rows)""")
  return data

def sanitize_obj(data):
  if 'created_at' in data:
    del data['created_at']
  if 'updated_at' in data:
    del data['updated_at']
  return data

def get_id(e):
  return e.id
def get_json_tup(a,e):
  a[e.id] = sanitize_obj(e.toJSON())
  return a

def db_get(db, table, idx):
  """Get an indexed object out of our db table"""
  id = db[table]['order'][idx]
  return db[table]['lookup'][id]

######################################################################
# HTML Helper Functions
######################################################################

def plain_content(db, title):
  # Episode, Venue, Annotation
  s = h(1, title)
  s += transcript_to_html(db)
  s += credits_to_html(db, 1)
  s += table_to_html(db, 'episode', 'Episodes', episode_to_html)
  s += table_to_html(db, 'venue', 'Venues', venue_to_html)
  return s

def transcript_to_html(db):
  s = h(2, "Transcript")
  para = ""
  last_pid = 0
  section_count = 0
  notes = []
  # check each annotation
  for a in IterateTable(db['annotation']):
    # skip media annotations (for now..)
    if a['type'] not in app_cfg.TEXT_ANNOTATION_TYPES:
      continue
    # if it's a section heading or the paragraph id changed, append
    # print(f"{a['type']} {a['paragraph_id']}")
    if a['type'] == 'section_heading' or a['paragraph_id'] != last_pid:
      if len(para):
        s += p(para)
      para = ""
      last_pid = a['paragraph_id']
    # if it's a new section, add a heading
    if a['type'] == 'section_heading':
      s += h(3, f"{app_cfg.ROMAN_NUMERALS[section_count]}: {a['text']}")
      section_count += 1
      last_pid = a['paragraph_id']
    elif a['type'] == 'footnote':
      para += f"<sup>{len(notes)+1}</sup> "
      notes.append(a['text'])
    else:
      para += a['text'] + " "
  if len(para):
    s += p(para)
  if len(notes):
    s += h(3, "Footnotes")
    for i, note in enumerate(notes):
      s += p(f"<sup>{i+1}</sup> " + note)
  return s

def credits_to_html(db, ep_num):
  e = db_get(db, 'episode', ep_num - 1)
  s = h(2, "Credits")
  s += pbr_to_paras(e['settings']['credits'])
  return s

def episode_to_html(e):
  """Render an upcoming episode as plain HTML"""
  if len(e['title']):
    s = h(3, f"Episode {e['episode_number']}: {e['title']}")
  else:
    s = h(3, f"Episode {e['episode_number']}")
  s += p(e['release_date'])
  s += h(4, "Artists")
  s += pbr(e['settings']['artists'])
  return s

def venue_to_html(e):
  """Render a venue as plain HTML"""
  s = h(3, e['title'])
  s += p(e['date'])
  s += h(4, "Artists")
  s += pbr(e['settings']['artists'])
  s += pbr_to_paras(e['settings']['credits'])
  return s


######################################################################
# HTML Helper Functions
######################################################################

def table_to_html(db, table, title, fn):
  """Convert a simple table list to HTML"""
  s = h(2, title)
  for e in IterateTable(db[table]):
    s += d(fn(e))
  return d(s)

# Helper functions that wrap stuff in HTML
def pbr_to_paras(s):
  return "".join(list(map(pbr, to_paras(s))))
def to_paras(s):
  return s.replace("# ", "").split("\n\n")
def d(s):
  return f"<div>{s}</div>"
def h(n, s):
  return f"<h{n}>{s}</h{n}>"
def br(s):
  return s.replace("\n","<br>")
def p(s):
  return f"<p>{s}</p>"
def pbr(s):
  return p(br(s))

def write_refresh(url, site_fp_out):
  write_text(f'<meta http-equiv="refresh" content="0; url={home_page}">', join(site_fp_out, 'index.html'))

######################################################################
# DB Iterator Helper
######################################################################

class IterateTable:
  """Iterator for the order-lookup objects we got from the database"""
  def __init__(self, table):
    self.table = table
    self.len = len(table['order'])
    self.index = -1
  def __iter__(self):
    return self
  def __next__(self):
    self.index += 1
    if self.index >= self.len:
      raise StopIteration
    id = self.table['order'][self.index]
    return self.table['lookup'][id]