|
@@ -109,25 +109,28 @@ def _state_code(member):
|
|
|
|
|
|
|
|
|
|
|
|
|
def _latest_chamber(member):
|
|
def _latest_chamber(member):
|
|
|
- terms = (member.get("terms") or {}).get("item") or member.get("terms") or []
|
|
|
|
|
- if isinstance(terms, dict):
|
|
|
|
|
- terms = terms.get("item") or []
|
|
|
|
|
|
|
+ terms = _terms_list(member)
|
|
|
if not terms:
|
|
if not terms:
|
|
|
return ""
|
|
return ""
|
|
|
- # Sort by startYear if available
|
|
|
|
|
def sk(t): return t.get("startYear") or 0
|
|
def sk(t): return t.get("startYear") or 0
|
|
|
last = sorted(terms, key=sk)[-1]
|
|
last = sorted(terms, key=sk)[-1]
|
|
|
return (last.get("chamber") or "").strip()
|
|
return (last.get("chamber") or "").strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
-def _served_dates(member):
|
|
|
|
|
- terms = (member.get("terms") or {}).get("item") or member.get("terms") or []
|
|
|
|
|
|
|
+def _terms_list(member):
|
|
|
|
|
+ terms = member.get("terms")
|
|
|
if isinstance(terms, dict):
|
|
if isinstance(terms, dict):
|
|
|
- terms = terms.get("item") or []
|
|
|
|
|
|
|
+ return terms.get("item") or []
|
|
|
|
|
+ if isinstance(terms, list):
|
|
|
|
|
+ return terms
|
|
|
|
|
+ return []
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def _served_dates(member):
|
|
|
|
|
+ terms = _terms_list(member)
|
|
|
if not terms:
|
|
if not terms:
|
|
|
return None, None
|
|
return None, None
|
|
|
- starts = []
|
|
|
|
|
- ends = []
|
|
|
|
|
|
|
+ starts, ends = [], []
|
|
|
for t in terms:
|
|
for t in terms:
|
|
|
sy = t.get("startYear")
|
|
sy = t.get("startYear")
|
|
|
ey = t.get("endYear")
|
|
ey = t.get("endYear")
|
|
@@ -140,6 +143,19 @@ def _served_dates(member):
|
|
|
return served_from, served_to
|
|
return served_from, served_to
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+def _congress_term(member, congress):
|
|
|
|
|
+ """Find the term for the target Congress; returns dict or None."""
|
|
|
|
|
+ for t in _terms_list(member):
|
|
|
|
|
+ if t.get("congress") == congress:
|
|
|
|
|
+ return {
|
|
|
|
|
+ "startYear": t.get("startYear"),
|
|
|
|
|
+ "endYear": t.get("endYear"),
|
|
|
|
|
+ "district": str(t["district"]) if t.get("district") is not None else None,
|
|
|
|
|
+ "chamber": t.get("chamber"),
|
|
|
|
|
+ }
|
|
|
|
|
+ return None
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
def _scan_for_lis(obj):
|
|
def _scan_for_lis(obj):
|
|
|
"""Recursively scan obj for any key matching LIS pattern; return string value or None."""
|
|
"""Recursively scan obj for any key matching LIS pattern; return string value or None."""
|
|
|
if isinstance(obj, dict):
|
|
if isinstance(obj, dict):
|
|
@@ -161,7 +177,7 @@ def _scan_for_lis(obj):
|
|
|
return None
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
-def _normalize_member(m):
|
|
|
|
|
|
|
+def _normalize_member(m, congress=None):
|
|
|
bioguide = (m.get("bioguideId") or "").strip()
|
|
bioguide = (m.get("bioguideId") or "").strip()
|
|
|
if not bioguide:
|
|
if not bioguide:
|
|
|
return None
|
|
return None
|
|
@@ -175,22 +191,30 @@ def _normalize_member(m):
|
|
|
first = m.get("firstName") or ""
|
|
first = m.get("firstName") or ""
|
|
|
last = m.get("lastName") or ""
|
|
last = m.get("lastName") or ""
|
|
|
name = (first + " " + last).strip()
|
|
name = (first + " " + last).strip()
|
|
|
- # If name is "Last, First" prefer invertedOrderName? Use as-is otherwise.
|
|
|
|
|
if "," in name and not m.get("directOrderName"):
|
|
if "," in name and not m.get("directOrderName"):
|
|
|
parts = [p.strip() for p in name.split(",", 1)]
|
|
parts = [p.strip() for p in name.split(",", 1)]
|
|
|
if len(parts) == 2:
|
|
if len(parts) == 2:
|
|
|
name = parts[1] + " " + parts[0]
|
|
name = parts[1] + " " + parts[0]
|
|
|
photo = ((m.get("depiction") or {}).get("imageUrl")) or None
|
|
photo = ((m.get("depiction") or {}).get("imageUrl")) or None
|
|
|
|
|
+ # Per-Congress term — most accurate source of district, start/end year for
|
|
|
|
|
+ # this Congress (matters for mid-term resignations and special-election entrants).
|
|
|
|
|
+ term = _congress_term(m, congress) if congress is not None else None
|
|
|
|
|
+ if term and term.get("district") is not None:
|
|
|
|
|
+ district = term["district"]
|
|
|
|
|
+ term_chamber = (term or {}).get("chamber") or chamber
|
|
|
return {
|
|
return {
|
|
|
"bioguide": bioguide,
|
|
"bioguide": bioguide,
|
|
|
"lis": None,
|
|
"lis": None,
|
|
|
"full_name": name,
|
|
"full_name": name,
|
|
|
"party": _party_letter(m),
|
|
"party": _party_letter(m),
|
|
|
"state": _state_code(m),
|
|
"state": _state_code(m),
|
|
|
- "district": district if chamber.lower() == "house" else None,
|
|
|
|
|
- "chamber": chamber,
|
|
|
|
|
|
|
+ "district": district if (term_chamber or "").lower().startswith("house") else None,
|
|
|
|
|
+ "chamber": term_chamber,
|
|
|
"served_from": served_from,
|
|
"served_from": served_from,
|
|
|
"served_to": served_to,
|
|
"served_to": served_to,
|
|
|
|
|
+ "congress_term": term,
|
|
|
|
|
+ "death_year": m.get("deathYear"),
|
|
|
|
|
+ "current_member": m.get("currentMember"),
|
|
|
"photo_url": photo,
|
|
"photo_url": photo,
|
|
|
"source": "congress.gov/v3",
|
|
"source": "congress.gov/v3",
|
|
|
}
|
|
}
|
|
@@ -232,7 +256,7 @@ def main():
|
|
|
f"0 senators with LIS resolved; {len(warnings)} warnings")
|
|
f"0 senators with LIS resolved; {len(warnings)} warnings")
|
|
|
return 0
|
|
return 0
|
|
|
for m in data.get("members") or []:
|
|
for m in data.get("members") or []:
|
|
|
- norm = _normalize_member(m)
|
|
|
|
|
|
|
+ norm = _normalize_member(m, args.congress)
|
|
|
if norm:
|
|
if norm:
|
|
|
directory[norm["bioguide"]] = norm
|
|
directory[norm["bioguide"]] = norm
|
|
|
nxt = ((data.get("pagination") or {}).get("next")) or None
|
|
nxt = ((data.get("pagination") or {}).get("next")) or None
|
|
@@ -292,13 +316,103 @@ def main():
|
|
|
resolved += 1
|
|
resolved += 1
|
|
|
|
|
|
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
+ # Fallback: individual lookups for House bioguide IDs that appear in vote
|
|
|
|
|
+ # data but are missing from the per-Congress directory. Catches people who
|
|
|
|
|
+ # were members-elect (appear in opening-day quorum XML) but never seated,
|
|
|
|
|
+ # e.g. Matt Gaetz in the 119th.
|
|
|
|
|
+ house_roster_path = out_dir / "house" / "roster.json"
|
|
|
|
|
+ rescued = 0
|
|
|
|
|
+ if house_roster_path.exists():
|
|
|
|
|
+ house_roster = json.loads(house_roster_path.read_text())
|
|
|
|
|
+ missing = [bg for bg in house_roster
|
|
|
|
|
+ if re.match(r"^[A-Z]\d{6}$", bg) and bg not in directory]
|
|
|
|
|
+ if missing:
|
|
|
|
|
+ print(f"enrich_roster: rescuing {len(missing)} House bioguide(s) missing from bulk directory",
|
|
|
|
|
+ file=sys.stderr)
|
|
|
|
|
+ for bg in missing:
|
|
|
|
|
+ url = f"{API_BASE}/member/{bg}?format=json&api_key={api_key}"
|
|
|
|
|
+ data = _fetch(url, cache_dir, warnings, label=f"member/{bg}")
|
|
|
|
|
+ if data is None:
|
|
|
|
|
+ continue
|
|
|
|
|
+ member = (data.get("member") or {})
|
|
|
|
|
+ norm = _normalize_member(member, args.congress)
|
|
|
|
|
+ if norm:
|
|
|
|
|
+ directory[bg] = norm
|
|
|
|
|
+ rescued += 1
|
|
|
|
|
+
|
|
|
|
|
+ # Replacement-linking pass — pair predecessor↔successor by (state, district)
|
|
|
|
|
+ # within the target Congress. Heuristic: any House seat with >1 member whose
|
|
|
|
|
+ # 119th term touches the Congress window. Sort by startYear (and then by
|
|
|
|
|
+ # served_to is-null) to determine order.
|
|
|
|
|
+ seats = {}
|
|
|
|
|
+ for bg, e in directory.items():
|
|
|
|
|
+ if not (e.get("chamber") or "").lower().startswith("house"):
|
|
|
|
|
+ continue
|
|
|
|
|
+ term = e.get("congress_term") or {}
|
|
|
|
|
+ if term.get("congress") and term["congress"] != args.congress:
|
|
|
|
|
+ continue # shouldn't happen, but safe
|
|
|
|
|
+ state = e.get("state")
|
|
|
|
|
+ district = (term.get("district") if term else None) or e.get("district")
|
|
|
|
|
+ if not state or district is None:
|
|
|
|
|
+ continue
|
|
|
|
|
+ seats.setdefault((state, str(district)), []).append(bg)
|
|
|
|
|
+ pairs = 0
|
|
|
|
|
+ for key, bgs in seats.items():
|
|
|
|
|
+ if len(bgs) < 2:
|
|
|
|
|
+ continue
|
|
|
|
|
+ def sortkey(bg):
|
|
|
|
|
+ e = directory[bg]
|
|
|
|
|
+ term = e.get("congress_term") or {}
|
|
|
|
|
+ start = term.get("startYear") or 9999
|
|
|
|
|
+ # served_to None => still serving => sort last
|
|
|
|
|
+ ended = e.get("served_to") is not None
|
|
|
|
|
+ return (start, 0 if ended else 1)
|
|
|
|
|
+ ordered = sorted(bgs, key=sortkey)
|
|
|
|
|
+ for i in range(len(ordered) - 1):
|
|
|
|
|
+ pred, succ = ordered[i], ordered[i + 1]
|
|
|
|
|
+ directory[pred]["replaced_by"] = succ
|
|
|
|
|
+ directory[succ]["replaces"] = pred
|
|
|
|
|
+ pairs += 1
|
|
|
|
|
+ if pairs:
|
|
|
|
|
+ print(f"enrich_roster: linked {pairs} House predecessor↔successor pair(s)",
|
|
|
|
|
+ file=sys.stderr)
|
|
|
|
|
+
|
|
|
|
|
+ # Per-Congress term + death_year live on the individual /member/{bg} response
|
|
|
|
|
+ # (the bulk listing only carries chamber + startYear). For accurate banner
|
|
|
|
|
+ # copy on replacement chains, fetch the individual record for every member
|
|
|
|
|
+ # who is on either side of a replacement pair. Cached, so re-runs are free.
|
|
|
|
|
+ enrich_targets = set()
|
|
|
|
|
+ for bg, e in directory.items():
|
|
|
|
|
+ if e.get("replaces") or e.get("replaced_by"):
|
|
|
|
|
+ enrich_targets.add(bg)
|
|
|
|
|
+ if enrich_targets:
|
|
|
|
|
+ print(f"enrich_roster: fetching detail for {len(enrich_targets)} replacement-chain members",
|
|
|
|
|
+ file=sys.stderr)
|
|
|
|
|
+ for bg in sorted(enrich_targets):
|
|
|
|
|
+ url = f"{API_BASE}/member/{bg}?format=json&api_key={api_key}"
|
|
|
|
|
+ data = _fetch(url, cache_dir, warnings, label=f"member-detail/{bg}")
|
|
|
|
|
+ if data is None:
|
|
|
|
|
+ continue
|
|
|
|
|
+ member = (data.get("member") or {})
|
|
|
|
|
+ term = _congress_term(member, args.congress)
|
|
|
|
|
+ if term:
|
|
|
|
|
+ directory[bg]["congress_term"] = term
|
|
|
|
|
+ # If individual endpoint reports a per-Congress district, prefer it.
|
|
|
|
|
+ if term.get("district") is not None:
|
|
|
|
|
+ directory[bg]["district"] = term["district"]
|
|
|
|
|
+ if member.get("deathYear") is not None:
|
|
|
|
|
+ directory[bg]["death_year"] = member.get("deathYear")
|
|
|
|
|
+ if member.get("currentMember") is not None:
|
|
|
|
|
+ directory[bg]["current_member"] = member.get("currentMember")
|
|
|
|
|
+
|
|
|
(out_dir / "members_directory.json").write_text(
|
|
(out_dir / "members_directory.json").write_text(
|
|
|
json.dumps(directory, indent=2, sort_keys=True))
|
|
json.dumps(directory, indent=2, sort_keys=True))
|
|
|
(out_dir / "lis_to_bioguide.json").write_text(
|
|
(out_dir / "lis_to_bioguide.json").write_text(
|
|
|
json.dumps(lis_map, indent=2, sort_keys=True))
|
|
json.dumps(lis_map, indent=2, sort_keys=True))
|
|
|
|
|
|
|
|
print(f"enrich_roster: {len(directory)} members directory written; "
|
|
print(f"enrich_roster: {len(directory)} members directory written; "
|
|
|
- f"{resolved} senators with LIS resolved; {len(warnings)} warnings")
|
|
|
|
|
|
|
+ f"{resolved} senators with LIS resolved; {rescued} House rescues; "
|
|
|
|
|
+ f"{pairs} replacements linked; {len(warnings)} warnings")
|
|
|
for w in warnings[:10]:
|
|
for w in warnings[:10]:
|
|
|
print(f" warn: {w}", file=sys.stderr)
|
|
print(f" warn: {w}", file=sys.stderr)
|
|
|
return 0
|
|
return 0
|