fetch_votes.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. #!/usr/bin/env python3
  2. """Fetch all House roll call votes from 119th Congress and analyze Massie's record."""
  3. import urllib.request
  4. import xml.etree.ElementTree as ET
  5. import json
  6. import time
  7. import os
  8. import sys
  9. MASSIE_ID = "M001184"
  10. YEARS = {2025: 362, 2026: 191}
  11. CACHE_DIR = "/home/user/polisci/vote_cache"
  12. os.makedirs(CACHE_DIR, exist_ok=True)
  13. UA = "Mozilla/5.0 (research; polisci-analysis)"
  14. def fetch(year, roll):
  15. path = f"{CACHE_DIR}/{year}_{roll:03d}.xml"
  16. if os.path.exists(path) and os.path.getsize(path) > 200:
  17. with open(path, "rb") as f:
  18. return f.read()
  19. url = f"https://clerk.house.gov/evs/{year}/roll{roll:03d}.xml"
  20. req = urllib.request.Request(url, headers={"User-Agent": UA})
  21. try:
  22. with urllib.request.urlopen(req, timeout=30) as r:
  23. data = r.read()
  24. with open(path, "wb") as f:
  25. f.write(data)
  26. time.sleep(0.35) # throttle
  27. return data
  28. except Exception as e:
  29. print(f"FAIL {year}/{roll}: {e}", file=sys.stderr)
  30. return None
  31. def parse(data, year, roll):
  32. try:
  33. root = ET.fromstring(data)
  34. except Exception as e:
  35. return None
  36. meta = root.find("vote-metadata")
  37. if meta is None:
  38. return None
  39. def t(tag):
  40. el = meta.find(tag)
  41. return (el.text or "").strip() if el is not None else ""
  42. info = {
  43. "year": year, "roll": roll,
  44. "date": t("action-date"),
  45. "question": t("vote-question"),
  46. "result": t("vote-result"),
  47. "legis_num": t("legis-num"),
  48. "desc": t("vote-desc"),
  49. "majority": t("majority"),
  50. }
  51. # party totals
  52. party_totals = {}
  53. for pt in meta.findall("vote-totals/totals-by-party"):
  54. party = pt.findtext("party", "").strip()
  55. party_totals[party] = {
  56. "yea": int(pt.findtext("yea-total", "0") or 0),
  57. "nay": int(pt.findtext("nay-total", "0") or 0),
  58. "present": int(pt.findtext("present-total", "0") or 0),
  59. "nv": int(pt.findtext("not-voting-total", "0") or 0),
  60. }
  61. info["R"] = party_totals.get("Republican", {"yea":0,"nay":0,"present":0,"nv":0})
  62. info["D"] = party_totals.get("Democratic", {"yea":0,"nay":0,"present":0,"nv":0})
  63. info["I"] = party_totals.get("Independent", {"yea":0,"nay":0,"present":0,"nv":0})
  64. # Massie's vote
  65. massie = None
  66. for rv in root.iter("recorded-vote"):
  67. leg = rv.find("legislator")
  68. if leg is not None and leg.get("name-id") == MASSIE_ID:
  69. v = rv.find("vote")
  70. massie = (v.text or "").strip() if v is not None else None
  71. break
  72. info["massie"] = massie
  73. return info
  74. def classify(v):
  75. """Given parsed vote, return (alignment, blocked_side)."""
  76. r_yea, r_nay = v["R"]["yea"], v["R"]["nay"]
  77. d_yea, d_nay = v["D"]["yea"], v["D"]["nay"]
  78. # Determine each party's majority position
  79. r_pos = "Yea" if r_yea > r_nay else ("Nay" if r_nay > r_yea else "Split")
  80. d_pos = "Yea" if d_yea > d_nay else ("Nay" if d_nay > d_yea else "Split")
  81. m = v["massie"]
  82. if m not in ("Yea", "Nay", "Aye", "No"):
  83. return ("N/A: " + (m or "absent"), None, r_pos, d_pos)
  84. # Normalize Aye/No to Yea/Nay
  85. m_norm = "Yea" if m in ("Yea", "Aye") else "Nay"
  86. helped_r = (r_pos != "Split" and m_norm == r_pos)
  87. helped_d = (d_pos != "Split" and m_norm == d_pos)
  88. if helped_r and helped_d:
  89. align = "Helped Both"
  90. elif helped_r:
  91. align = "Helped Republicans"
  92. elif helped_d:
  93. align = "Helped Democrats"
  94. else:
  95. align = "Helped Neither"
  96. # Blocking analysis: Massie voted against [side]'s majority position, AND that side lost the vote
  97. result = v["result"].lower()
  98. measure_passed = result in ("passed", "agreed to", "adopted")
  99. measure_failed = "fail" in result or "reject" in result or "not agreed" in result or "not passed" in result
  100. blocked = None
  101. # "Dem-backed measure": D majority was Yea -> they wanted it to pass.
  102. # If D wanted Yea, measure failed, and Massie voted Nay -> Massie helped block a Dem-backed measure.
  103. if d_pos == "Yea" and measure_failed and m_norm == "Nay":
  104. if r_pos != "Yea": # only count if Rs didn't also back it -> actually a partisan block
  105. blocked = "Democrat"
  106. # Also if D wanted Nay (to defeat it) but it passed and Massie voted Yea... that's not blocking, that's helping pass
  107. if r_pos == "Yea" and measure_failed and m_norm == "Nay":
  108. if d_pos != "Yea":
  109. blocked = "Republican"
  110. return (align, blocked, r_pos, d_pos)
  111. def main():
  112. all_votes = []
  113. total = sum(YEARS.values())
  114. done = 0
  115. for year, max_roll in YEARS.items():
  116. for roll in range(1, max_roll + 1):
  117. data = fetch(year, roll)
  118. done += 1
  119. if done % 25 == 0:
  120. print(f" fetched {done}/{total}", file=sys.stderr)
  121. if not data:
  122. continue
  123. v = parse(data, year, roll)
  124. if not v:
  125. continue
  126. align, blocked, r_pos, d_pos = classify(v)
  127. v["alignment"] = align
  128. v["blocked"] = blocked
  129. v["r_pos"] = r_pos
  130. v["d_pos"] = d_pos
  131. all_votes.append(v)
  132. with open("/home/user/polisci/votes.json", "w") as f:
  133. json.dump(all_votes, f)
  134. print(f"Saved {len(all_votes)} votes", file=sys.stderr)
  135. if __name__ == "__main__":
  136. main()