基于 BibTeX 构建可视化合作网络

在科研中，作者之间的合作关系往往蕴含了很多信息，比如谁是核心作者、合作紧密的团队以及跨学科合作的桥梁。
通过对 BibTeX 文件中的作者信息解析可以获取作者之间的合作关系，并生成可视化图表。
脚本内容

import re
import json
from collections import defaultdict, Counter
from itertools import combinations
from pathlib import Path
import argparse


def normalize_author_name(name: str) -> str:
    """
    Normalize author names:
    - "Smith, John" -> "John SMITH"
    - Remove dots and extra spaces
    - Capitalize first names, uppercase last name
    """
    name = name.replace(".", "").strip()
    if "," in name:
        parts = [p.strip() for p in name.split(",")]
        if len(parts) == 2:
            first, last = parts[1], parts[0]
        else:
            return name
    else:
        parts = name.split()
        if len(parts) >= 2:
            first, last = " ".join(parts[:-1]), parts[-1]
        else:
            return name

    first = " ".join(w.capitalize() for w in first.split())
    last = last.upper()
    return f"{first} {last}"


def parse_bibtex_authors(bib_file: str):
    """
    Parse authors from a BibTeX file.
    Returns a list of lists: [[author1, author2, ...], ...]
    """
    with open(bib_file, encoding="utf-8") as f:
        content = f.read()

    entries = content.split("@")[1:]
    papers_authors = []

    for entry in entries:
        match = re.search(r"author\s*=\s*[{](.*?)[}]", entry, re.S | re.I)
        if not match:
            continue
        authors_raw = match.group(1)
        authors = [
            normalize_author_name(a) for a in authors_raw.split(" and ") if a.strip()
        ]
        papers_authors.append(authors)

    return papers_authors


def build_coauthor_graph_with_weights(papers_authors):
    """
    Build a co-author graph (undirected) with edge weights.
    Returns:
      - graph: adjacency set dict
      - edge_counter: dict with tuple(sorted(a,b)) -> number of coauthored papers
    """
    graph = defaultdict(set)
    edge_counter = Counter()
    for authors in papers_authors:
        for a, b in combinations(authors, 2):
            graph[a].add(b)
            graph[b].add(a)
            edge = tuple(sorted([a, b]))
            edge_counter[edge] += 1
    return graph, edge_counter


def export_to_html(graph, counter, edge_counter, output_file="coauthors.html"):
    """
    Export the co-author graph to an HTML file with D3.js force-directed layout.
    Edge thickness and label show number of coauthored papers.
    """
    nodes = [{"id": a, "group": 1, "size": counter[a]} for a in graph]
    links = [
        {"source": a, "target": b, "value": edge_counter[tuple(sorted([a, b]))]}
        for a in graph
        for b in graph[a]
        if a < b
    ]

    data = {"nodes": nodes, "links": links}

    html_template = f"""
<!DOCTYPE html>
<meta charset="utf-8">
<style>
  svg {{
    border: 1px solid #ccc;
    background-color: #fff;
  }}
  .links line {{
    stroke: #999;
    stroke-opacity: 0.6;
  }}
  .nodes circle {{
    stroke: #fff;
    stroke-width: 1.5px;
  }}
  text {{
    font-family: sans-serif;
    font-size: 10px;
  }}
</style>
<body>
  <div style="display:flex; flex-direction:column; align-items:center; height:100vh; margin:0;">
    <h1 style="margin-bottom:10px;">Co-author Network</h1>
    <svg width="960" height="600"></svg>
  </div>
<script src="https://d3js.org/d3.v7.min.js"></script>
<script>
  var graph = {json.dumps(data)};

  var svg = d3.select("svg"),
      width = +svg.attr("width"),
      height = +svg.attr("height");

  var color = d3.scaleOrdinal(d3.schemeCategory10);
  var container = svg.append("g");

  var link = container.append("g")
      .attr("class", "links")
    .selectAll("line")
    .data(graph.links)
    .enter().append("line")
      .attr("stroke-width", d => Math.sqrt(d.value)); // thickness by coauthored papers

  var linkLabel = container.append("g")
      .selectAll("text")
      .data(graph.links.filter(d => d.value > 1))
      .enter().append("text")
      .text(d => d.value)
      .attr("font-size", 8)
      .attr("fill", "#555");

  var node = container.append("g")
      .attr("class", "nodes")
    .selectAll("circle")
    .data(graph.nodes)
    .enter().append("circle")
      .attr("r", d => 5 + d.size)
      .attr("fill", d => color(d.group))
      .call(drag(simulation));

  var label = container.append("g")
      .selectAll("text")
      .data(graph.nodes)
      .enter().append("text")
      .text(d => d.id)
      .attr("x", 6)
      .attr("y", 3);

  node.append("title")
      .text(d => d.id);

  var simulation = d3.forceSimulation(graph.nodes)
      .force("link", d3.forceLink(graph.links).id(d => d.id).distance(80))
      .force("charge", d3.forceManyBody().strength(d => -80-40 * Math.sqrt(d.size)))
      .force("center", d3.forceCenter(width / 2, height / 2))
      .force("x", d3.forceX(width/2).strength(0.05))
      .force("y", d3.forceY(height/2).strength(0.05))
      .force("collide", d3.forceCollide(d => 5 + d.size + 2));

  simulation.on("tick", () => {{
    link
        .attr("x1", d => d.source.x)
        .attr("y1", d => d.source.y)
        .attr("x2", d => d.target.x)
        .attr("y2", d => d.target.y);

    linkLabel
        .attr("x", d => (d.source.x + d.target.x)/2 )
        .attr("y", d => (d.source.y + d.target.y)/2 );

    node
        .attr("cx", d => d.x)
        .attr("cy", d => d.y);

    label
        .attr("x", d => d.x + 6)
        .attr("y", d => d.y + 3);
  }});

  function drag(simulation) {{
    function dragstarted(event, d) {{
      if (!event.active) simulation.alphaTarget(0.3).restart();
      d.fx = d.x;
      d.fy = d.y;
    }}
    function dragged(event, d) {{
      d.fx = event.x;
      d.fy = event.y;
    }}
    function dragended(event, d) {{
      if (!event.active) simulation.alphaTarget(0);
      d.fx = null;
      d.fy = null;
    }}
    return d3.drag()
        .on("start", dragstarted)
        .on("drag", dragged)
        .on("end", dragended);
  }}

  var zoom = d3.zoom()
      .scaleExtent([0.1, 4])
      .on("zoom", (event) => {{
        container.attr("transform", event.transform);
      }});
  svg.call(zoom);

  simulation.on("end", () => {{
    const bounds = container.node().getBBox();
    const scale = 0.85 / Math.max(bounds.width / width, bounds.height / height);
    const translate = [
      width / 2 - scale * (bounds.x + bounds.width / 2),
      height / 2 - scale * (bounds.y + bounds.height / 2)
    ];
    svg.transition().duration(750).call(
      zoom.transform,
      d3.zoomIdentity.translate(translate[0], translate[1]).scale(scale)
    );
  }});
</script>
</body>
"""
    output_path = Path(output_file).resolve()
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(html_template)

    print(f"\nVisualization generated.\nOpen in your browser: {output_path.as_posix()}")


def main():
    parser = argparse.ArgumentParser(
        description="Generate co-author network from a BibTeX file."
    )
    parser.add_argument("bibfile", help="Path to the input BibTeX file")
    parser.add_argument(
        "-o", "--output", default="coauthors.html", help="Output HTML file"
    )
    args = parser.parse_args()

    papers_authors = parse_bibtex_authors(args.bibfile)
    if not papers_authors:
        print("No authors found in the BibTeX file.")
        return

    counter = Counter([a for authors in papers_authors for a in authors])
    print("=== Top 10 authors by number of papers ===")
    for author, count in counter.most_common(10):
        print(f"{author}: {count} papers")

    graph, edge_counter = build_coauthor_graph_with_weights(papers_authors)
    export_to_html(graph, counter, edge_counter, args.output)


if __name__ == "__main__":
    main()
使用方法

提供 BibTeX 文件即可，可以指定输出的 HTML 文件名（默认名为 coauthors.html）。
1	python coauthor_network.py your_file.bib -o coauthors.html
直接在浏览器中打开 HTML 文件即可查看。
效果如下