build_app.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. #!/usr/bin/env python3
  2. """Build the embeddable dashboard artifact under results/<C>/.
  3. Wipes and recreates results/<C>/, copies templates + vendor + per-member
  4. data, stamps the manifest version into the HTML pages, and writes a
  5. README with embedding instructions.
  6. Usage: python3 build_app.py --congress 119
  7. """
  8. import argparse
  9. import json
  10. import shutil
  11. import sys
  12. from datetime import datetime, timezone
  13. from pathlib import Path
  14. ROOT = Path(__file__).resolve().parent
  15. TEMPLATE_FILES = [
  16. "app.html",
  17. "compare.html",
  18. "ranking.html",
  19. "app.js",
  20. "compare.js",
  21. "ranking.js",
  22. "app.css",
  23. ]
  24. VENDOR_FILES = [
  25. "vendor/chart.umd.min.js",
  26. "vendor/sortable.min.js",
  27. ]
  28. def dir_size_mb(path: Path) -> float:
  29. total = 0
  30. for p in path.rglob("*"):
  31. if p.is_file():
  32. total += p.stat().st_size
  33. return total / (1024 * 1024)
  34. def stamp_manifest_version(html_path: Path, version: str, generated_at: str, manifest: dict) -> None:
  35. text = html_path.read_text(encoding="utf-8")
  36. ver_tag = (
  37. '<script type="application/json" id="polisci-manifest-version">'
  38. + json.dumps({"version": version, "generated_at": generated_at}, separators=(",", ":"))
  39. + "</script>"
  40. )
  41. # Inline the full manifest so the picker loads under file:// (per-member JSON
  42. # still requires HTTP — but the app shell is interactive without a server).
  43. manifest_tag = (
  44. '<script type="application/json" id="polisci-manifest">'
  45. + json.dumps(manifest, separators=(",", ":"))
  46. + "</script>"
  47. )
  48. if "</head>" not in text:
  49. raise SystemExit(f"build_app: no </head> in {html_path}")
  50. text = text.replace("</head>", ver_tag + "\n" + manifest_tag + "\n</head>", 1)
  51. html_path.write_text(text, encoding="utf-8")
  52. README_TEMPLATE = """# {congress_ord} Congress Voting Dashboard — Embeddable Artifact
  53. This directory is a self-contained dashboard for the {congress_ord} Congress.
  54. No external network requests at runtime; all data, charts, and vendor
  55. scripts ship in this directory.
  56. ## Files
  57. - `app.html` — single-member dashboard
  58. - `compare.html` — multi-member comparison view
  59. - `app.js`, `compare.js`, `app.css` — application code
  60. - `vendor/chart.umd.min.js` — Chart.js 4.4.0
  61. - `vendor/sortable.min.js` — SortableJS 1.15.2
  62. - `data/manifest.json` — member index (~{member_count} entries)
  63. - `data/members/<id>.json` — per-member metrics (~80 KB each)
  64. ## Embed modes
  65. ### 1. Standalone
  66. Open `app.html` (or `compare.html`) directly.
  67. ### 2. Iframe
  68. ```html
  69. <iframe
  70. src="https://your.host/path/to/app.html"
  71. sandbox="allow-scripts allow-same-origin"
  72. referrerpolicy="no-referrer"
  73. style="width:100%;min-height:1200px;border:0"></iframe>
  74. ```
  75. ### 3. Inline (single host page)
  76. ```html
  77. <link rel="stylesheet" href="https://your.host/path/to/app.css">
  78. <div id="polisci-root" data-base="https://your.host/path/to/data/"></div>
  79. <script src="https://your.host/path/to/vendor/chart.umd.min.js" defer></script>
  80. <script src="https://your.host/path/to/vendor/sortable.min.js" defer></script>
  81. <script src="https://your.host/path/to/app.js" defer></script>
  82. ```
  83. All CSS is scoped under `#polisci-root` to avoid collisions with host styles.
  84. Override `data-base` to point at the data directory served from your host.
  85. ## Recommended Content Security Policy
  86. ```
  87. Content-Security-Policy: default-src 'self'; script-src 'self';
  88. style-src 'self'; img-src 'self' data:; connect-src 'self';
  89. frame-ancestors <your-domain>; base-uri 'none'; form-action 'none'
  90. ```
  91. ## Regenerating
  92. From the project root:
  93. ```
  94. python3 fetch.py --congress {congress}
  95. python3 parse.py --congress {congress}
  96. python3 enrich_roster.py --congress {congress}
  97. pytest tests/
  98. python3 build_members.py --congress {congress}
  99. python3 build_app.py --congress {congress}
  100. ```
  101. Or the all-in-one:
  102. ```
  103. python3 build_all.py --congress {congress}
  104. ```
  105. ## Provenance
  106. Each per-member JSON includes a `_meta` block with `schema_version`,
  107. `pipeline_version`, `classifier_hash` (SHA-256 of analyze.py),
  108. `data_snapshot_date`, and `source_xml_count`. See `DOCUMENTATION.md`
  109. in the source repository for full methodology.
  110. """
  111. def ordinal(n: int) -> str:
  112. if 11 <= (n % 100) <= 13:
  113. suf = "th"
  114. else:
  115. suf = {1: "st", 2: "nd", 3: "rd"}.get(n % 10, "th")
  116. return f"{n}{suf}"
  117. def main() -> int:
  118. ap = argparse.ArgumentParser(description="Build embeddable dashboard artifact.")
  119. ap.add_argument("--congress", type=int, default=119)
  120. args = ap.parse_args()
  121. congress = args.congress
  122. template_dir = ROOT / "template"
  123. data_dir = ROOT / "data" / str(congress)
  124. manifest_path = data_dir / "manifest.json"
  125. members_dir = data_dir / "members"
  126. out_dir = ROOT / "results" / str(congress)
  127. if not manifest_path.is_file():
  128. print(
  129. f"build_app: missing {manifest_path}; run build_members.py --congress {congress} first",
  130. file=sys.stderr,
  131. )
  132. return 2
  133. with manifest_path.open("r", encoding="utf-8") as f:
  134. manifest = json.load(f)
  135. version = manifest.get("version", "unknown")
  136. member_count = len(manifest.get("members", []))
  137. # Wipe + recreate
  138. if out_dir.exists():
  139. shutil.rmtree(out_dir)
  140. out_dir.mkdir(parents=True, exist_ok=True)
  141. print(f"build_app: cleaned and recreated {out_dir}/")
  142. # Copy template files
  143. for rel in TEMPLATE_FILES:
  144. src = template_dir / rel
  145. dst = out_dir / rel
  146. dst.parent.mkdir(parents=True, exist_ok=True)
  147. shutil.copy2(src, dst)
  148. # Copy vendor files
  149. for rel in VENDOR_FILES:
  150. src = template_dir / rel
  151. dst = out_dir / rel
  152. dst.parent.mkdir(parents=True, exist_ok=True)
  153. shutil.copy2(src, dst)
  154. # Stamp manifest version + full inline manifest into HTML heads
  155. generated_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
  156. for html_name in ("app.html", "compare.html", "ranking.html"):
  157. stamp_manifest_version(out_dir / html_name, version, generated_at, manifest)
  158. # Copy manifest + members data
  159. out_data = out_dir / "data"
  160. out_data.mkdir(parents=True, exist_ok=True)
  161. shutil.copy2(manifest_path, out_data / "manifest.json")
  162. shutil.copytree(members_dir, out_data / "members", dirs_exist_ok=True)
  163. # README
  164. readme = README_TEMPLATE.format(
  165. congress=congress,
  166. congress_ord=ordinal(congress),
  167. member_count=member_count,
  168. )
  169. (out_dir / "README.md").write_text(readme, encoding="utf-8")
  170. # Methodology — copied verbatim from project root; linked from page footers
  171. methodology_src = Path("Methodology.md")
  172. if methodology_src.exists():
  173. shutil.copy2(methodology_src, out_dir / "Methodology.md")
  174. size_mb = dir_size_mb(out_dir)
  175. print(f"build_app: results/{congress}/ ready ({member_count} members, {size_mb:.1f} MB)")
  176. return 0
  177. if __name__ == "__main__":
  178. sys.exit(main())