在科研中,作者之间的合作关系往往蕴含了很多信息,比如谁是核心作者、合作紧密的团队以及跨学科合作的桥梁。

通过对 BibTeX 文件中的作者信息解析可以获取作者之间的合作关系,并生成可视化图表。

脚本内容

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
import re
import json
from collections import defaultdict, Counter
from itertools import combinations
from pathlib import Path
import argparse


def normalize_author_name(name: str) -> str:
"""
Normalize author names:
- "Smith, John" -> "John SMITH"
- Remove dots and extra spaces
- Capitalize first names, uppercase last name
"""
name = name.replace(".", "").strip()
if "," in name:
parts = [p.strip() for p in name.split(",")]
if len(parts) == 2:
first, last = parts[1], parts[0]
else:
return name
else:
parts = name.split()
if len(parts) >= 2:
first, last = " ".join(parts[:-1]), parts[-1]
else:
return name

first = " ".join(w.capitalize() for w in first.split())
last = last.upper()
return f"{first} {last}"


def parse_bibtex_authors(bib_file: str):
"""
Parse authors from a BibTeX file.
Returns a list of lists: [[author1, author2, ...], ...]
"""
with open(bib_file, encoding="utf-8") as f:
content = f.read()

entries = content.split("@")[1:]
papers_authors = []

for entry in entries:
match = re.search(r"author\s*=\s*[{](.*?)[}]", entry, re.S | re.I)
if not match:
continue
authors_raw = match.group(1)
authors = [
normalize_author_name(a) for a in authors_raw.split(" and ") if a.strip()
]
papers_authors.append(authors)

return papers_authors


def build_coauthor_graph_with_weights(papers_authors):
"""
Build a co-author graph (undirected) with edge weights.
Returns:
- graph: adjacency set dict
- edge_counter: dict with tuple(sorted(a,b)) -> number of coauthored papers
"""
graph = defaultdict(set)
edge_counter = Counter()
for authors in papers_authors:
for a, b in combinations(authors, 2):
graph[a].add(b)
graph[b].add(a)
edge = tuple(sorted([a, b]))
edge_counter[edge] += 1
return graph, edge_counter


def export_to_html(graph, counter, edge_counter, output_file="coauthors.html"):
"""
Export the co-author graph to an HTML file with D3.js force-directed layout.
Edge thickness and label show number of coauthored papers.
"""
nodes = [{"id": a, "group": 1, "size": counter[a]} for a in graph]
links = [
{"source": a, "target": b, "value": edge_counter[tuple(sorted([a, b]))]}
for a in graph
for b in graph[a]
if a < b
]

data = {"nodes": nodes, "links": links}

html_template = f"""
<!DOCTYPE html>
<meta charset="utf-8">
<style>
svg {{
border: 1px solid #ccc;
background-color: #fff;
}}
.links line {{
stroke: #999;
stroke-opacity: 0.6;
}}
.nodes circle {{
stroke: #fff;
stroke-width: 1.5px;
}}
text {{
font-family: sans-serif;
font-size: 10px;
}}
</style>
<body>
<div style="display:flex; flex-direction:column; align-items:center; height:100vh; margin:0;">
<h1 style="margin-bottom:10px;">Co-author Network</h1>
<svg width="960" height="600"></svg>
</div>
<script src="https://d3js.org/d3.v7.min.js"></script>
<script>
var graph = {json.dumps(data)};

var svg = d3.select("svg"),
width = +svg.attr("width"),
height = +svg.attr("height");

var color = d3.scaleOrdinal(d3.schemeCategory10);
var container = svg.append("g");

var link = container.append("g")
.attr("class", "links")
.selectAll("line")
.data(graph.links)
.enter().append("line")
.attr("stroke-width", d => Math.sqrt(d.value)); // thickness by coauthored papers

var linkLabel = container.append("g")
.selectAll("text")
.data(graph.links.filter(d => d.value > 1))
.enter().append("text")
.text(d => d.value)
.attr("font-size", 8)
.attr("fill", "#555");

var node = container.append("g")
.attr("class", "nodes")
.selectAll("circle")
.data(graph.nodes)
.enter().append("circle")
.attr("r", d => 5 + d.size)
.attr("fill", d => color(d.group))
.call(drag(simulation));

var label = container.append("g")
.selectAll("text")
.data(graph.nodes)
.enter().append("text")
.text(d => d.id)
.attr("x", 6)
.attr("y", 3);

node.append("title")
.text(d => d.id);

var simulation = d3.forceSimulation(graph.nodes)
.force("link", d3.forceLink(graph.links).id(d => d.id).distance(80))
.force("charge", d3.forceManyBody().strength(d => -80-40 * Math.sqrt(d.size)))
.force("center", d3.forceCenter(width / 2, height / 2))
.force("x", d3.forceX(width/2).strength(0.05))
.force("y", d3.forceY(height/2).strength(0.05))
.force("collide", d3.forceCollide(d => 5 + d.size + 2));

simulation.on("tick", () => {{
link
.attr("x1", d => d.source.x)
.attr("y1", d => d.source.y)
.attr("x2", d => d.target.x)
.attr("y2", d => d.target.y);

linkLabel
.attr("x", d => (d.source.x + d.target.x)/2 )
.attr("y", d => (d.source.y + d.target.y)/2 );

node
.attr("cx", d => d.x)
.attr("cy", d => d.y);

label
.attr("x", d => d.x + 6)
.attr("y", d => d.y + 3);
}});

function drag(simulation) {{
function dragstarted(event, d) {{
if (!event.active) simulation.alphaTarget(0.3).restart();
d.fx = d.x;
d.fy = d.y;
}}
function dragged(event, d) {{
d.fx = event.x;
d.fy = event.y;
}}
function dragended(event, d) {{
if (!event.active) simulation.alphaTarget(0);
d.fx = null;
d.fy = null;
}}
return d3.drag()
.on("start", dragstarted)
.on("drag", dragged)
.on("end", dragended);
}}

var zoom = d3.zoom()
.scaleExtent([0.1, 4])
.on("zoom", (event) => {{
container.attr("transform", event.transform);
}});
svg.call(zoom);

simulation.on("end", () => {{
const bounds = container.node().getBBox();
const scale = 0.85 / Math.max(bounds.width / width, bounds.height / height);
const translate = [
width / 2 - scale * (bounds.x + bounds.width / 2),
height / 2 - scale * (bounds.y + bounds.height / 2)
];
svg.transition().duration(750).call(
zoom.transform,
d3.zoomIdentity.translate(translate[0], translate[1]).scale(scale)
);
}});
</script>
</body>
"""
output_path = Path(output_file).resolve()
with open(output_path, "w", encoding="utf-8") as f:
f.write(html_template)

print(f"\nVisualization generated.\nOpen in your browser: {output_path.as_posix()}")


def main():
parser = argparse.ArgumentParser(
description="Generate co-author network from a BibTeX file."
)
parser.add_argument("bibfile", help="Path to the input BibTeX file")
parser.add_argument(
"-o", "--output", default="coauthors.html", help="Output HTML file"
)
args = parser.parse_args()

papers_authors = parse_bibtex_authors(args.bibfile)
if not papers_authors:
print("No authors found in the BibTeX file.")
return

counter = Counter([a for authors in papers_authors for a in authors])
print("=== Top 10 authors by number of papers ===")
for author, count in counter.most_common(10):
print(f"{author}: {count} papers")

graph, edge_counter = build_coauthor_graph_with_weights(papers_authors)
export_to_html(graph, counter, edge_counter, args.output)


if __name__ == "__main__":
main()

使用方法

提供 BibTeX 文件即可,可以指定输出的 HTML 文件名(默认名为 coauthors.html)。

1
python coauthor_network.py your_file.bib -o coauthors.html

直接在浏览器中打开 HTML 文件即可查看。

效果如下