Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
9a23922
joint first try
Jean-Baptiste-Camps Jan 30, 2026
03ee203
joint inference working a bit
Jean-Baptiste-Camps Feb 2, 2026
d3a35e2
model checks
Jean-Baptiste-Camps Feb 2, 2026
d34db08
break
Jean-Baptiste-Camps Feb 2, 2026
a405000
unified model
Jean-Baptiste-Camps Feb 2, 2026
99ad0b8
unified model trained
Jean-Baptiste-Camps Feb 3, 2026
eef8302
fix typo
Jean-Baptiste-Camps Feb 3, 2026
c3efc09
new model
Jean-Baptiste-Camps Feb 3, 2026
5ba7200
new figs
Jean-Baptiste-Camps Feb 3, 2026
8ba18c7
duplicate models
Jean-Baptiste-Camps Feb 3, 2026
23a119a
merge
Jean-Baptiste-Camps Feb 3, 2026
ca1de1d
merge
Jean-Baptiste-Camps Feb 3, 2026
4db658e
alt plots
Jean-Baptiste-Camps Feb 3, 2026
42fd7d2
restored local model
Jean-Baptiste-Camps Feb 3, 2026
0033655
plots
Jean-Baptiste-Camps Feb 3, 2026
a89240e
limits
Jean-Baptiste-Camps Feb 3, 2026
b51f06c
limites
Jean-Baptiste-Camps Feb 3, 2026
ea19d52
run 3
Jean-Baptiste-Camps Feb 4, 2026
9545717
mcmc methods 1
Jean-Baptiste-Camps Feb 4, 2026
4342b91
params
Jean-Baptiste-Camps Feb 4, 2026
bd651dc
logs
Jean-Baptiste-Camps Feb 4, 2026
6cce14d
setting the benchmark
Jean-Baptiste-Camps Feb 4, 2026
5e26d85
first iter
Jean-Baptiste-Camps Feb 4, 2026
84416d1
first iter
Jean-Baptiste-Camps Feb 4, 2026
92bf2da
benchmark
Jean-Baptiste-Camps Feb 4, 2026
d58ea7f
update stemmata
Jean-Baptiste-Camps Feb 6, 2026
cc66ddc
more summary stats
Jean-Baptiste-Camps Feb 6, 2026
0788cc4
new sumstats
Jean-Baptiste-Camps Feb 6, 2026
933bdb5
more summary stats
Jean-Baptiste-Camps Feb 6, 2026
288e9dc
bug on server
Jean-Baptiste-Camps Feb 9, 2026
53ef707
debug
Jean-Baptiste-Camps Feb 9, 2026
17ed796
better dates aliscans
Jean-Baptiste-Camps Feb 9, 2026
2e710e5
debug
Jean-Baptiste-Camps Feb 9, 2026
b9a9b7a
checking functions
Jean-Baptiste-Camps Feb 9, 2026
685c4f5
fixed code for f2 and f1
Jean-Baptiste-Camps Feb 9, 2026
e947e68
typo
Jean-Baptiste-Camps Feb 9, 2026
8aa3a1c
more stemmata
Jean-Baptiste-Camps Feb 9, 2026
ad1728d
new graphs
Jean-Baptiste-Camps Feb 10, 2026
267cd5a
new MCMC params
Jean-Baptiste-Camps Feb 10, 2026
a41492b
new config
Jean-Baptiste-Camps Feb 10, 2026
06402a9
new plot
Jean-Baptiste-Camps Feb 10, 2026
0dbde20
new plot
Jean-Baptiste-Camps Feb 10, 2026
1f343be
final plot
Jean-Baptiste-Camps Feb 10, 2026
604bdbb
avoid overfitting
Jean-Baptiste-Camps Feb 10, 2026
f129b4c
models
Jean-Baptiste-Camps Feb 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added __pycache__/birth_death_utils.cpython-311.pyc
Binary file not shown.
248 changes: 246 additions & 2 deletions birth_death_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
import matplotlib.pyplot as plt
from networkx.algorithms.traversal.depth_first_search import dfs_tree
import random

from collections import Counter
import re

def leaves(graph):
"""
Expand Down Expand Up @@ -81,6 +82,107 @@ def subtree_size(graph, node):
return len(dfs_tree(graph,node).nodes())


def generate_tree_unified(lda0, mu, decay, decim, Tact, Tinact, Tcrisis):
"""
Generate a tree (arbre réel) according to birth death model.

Parameters
----------
lda0 : float
birth rate of new node per node per iteration
mu : float
death rate of nodes per node per per iteration
decay: float
decay rate
decim: float
decimation rate
Tact : int
number of iterations of the active reproduction phase
Tinact : int
number of iterations of the pure death phase (lda is set to 0)
Tcrisis: int
number of iterations of the time of the crisis

Returns
-------
G : nx.DiGraph()
networkx graph object of the generated tree with following node attributes:
'state' : boolean, True if node living at the end of simulation
'birth_time' : int
'death_time' : int

"""
currentID = 0
G = nx.DiGraph()
G.add_node(currentID)
living_nodes = set([0])

birth_time = {0: 0}
death_time = {}

pop = 1

t = 0
crisis_happened = False

while t < Tact:
lda1 = (lda0 * (1-decay)) + ( (2 * lda0 / Tact) * (Tact - t) * decay)
prob_event = lda1 + mu
prob_birth = lda1 / (lda1 + mu)
prob_death = mu / (lda1 + mu)

if pop == 0:
t = Tact
break
next_event = np.random.exponential(scale=1. / (prob_event * pop))
if next_event > Tact:
t = Tact
break

t += next_event
r = np.random.rand()
current_node = np.random.choice(list(living_nodes))
if r < prob_birth:
currentID += 1
G.add_node(currentID)
G.add_edge(current_node, currentID)
living_nodes.add(currentID)
pop += 1
birth_time[currentID] = t
else:
living_nodes.remove(current_node)
pop -= 1
death_time[current_node] = t

if t > Tcrisis and not crisis_happened:
decimated_nodes = random.sample(list(living_nodes), int(decim * pop))
for n in decimated_nodes:
living_nodes.remove(int(n))
death_time[n] = t
pop -= 1
crisis_happened = True

while t < Tact + Tinact:
if pop == 0:
t = Tact + Tinact
break
next_event = np.random.exponential(scale=1. / (mu * pop))
if next_event > Tact + Tinact:
t = Tact + Tinact
break
t += next_event
current_node = np.random.choice(list(living_nodes))
living_nodes.remove(current_node)
pop -= 1
death_time[current_node] = t

living = {n: (n in living_nodes) for n in G.nodes()}
nx.set_node_attributes(G, living, 'state')
nx.set_node_attributes(G, birth_time, 'birth_time')
nx.set_node_attributes(G, death_time, 'death_time')

return G

def generate_tree(lda, mu, Nact, Ninact):
"""
Generate a tree (arbre réel) according to birth death model.
Expand Down Expand Up @@ -559,4 +661,146 @@ def load_from_OpenStemmata(file):

# remove non-branching unattested nodes
st = generate_stemma(G)
return st
return st


## Functions for getting stats
def compute_summary_stats(g):
"""
Generate a vector of summary statistics from a graph generated by birth-death simulation

Parameters
----------
g : nx.DiGraph()
tree graph generated by birth_death_utils.ct_bd_tree

Returns
-------
list
vector of summary statistics characterizing a single tradition
"""
n_living = list(nx.get_node_attributes(g, 'state').values()).count(True)

if n_living == 0:
return None

if n_living > 0:
birth_times_trad = []
for n in g.nodes():
if g.nodes[n]['state']:
birth_times_trad.append(g.nodes[n]['birth_time'])
timelapse = int(max(birth_times_trad) - min(birth_times_trad))
earliest_wit = int(min(birth_times_trad))
median_wit = int(np.median(birth_times_trad))
latest_wit = int(max(birth_times_trad))

if n_living == 1:
return [1, timelapse, earliest_wit, median_wit, latest_wit, -1, -1, -1, -1, -1, -1, -1]

if n_living == 2:
return [2, timelapse, earliest_wit, median_wit, latest_wit, -1, -1, -1, -1, -1, -1, -1]

if n_living >= 3:
degrees = []
direct_filiation_nb = 0
arch_dists = []
st = generate_stemma(g)
archetype = root(st)

for n in st.nodes():
degrees.append(st.out_degree(n))

if n != archetype:
father = list(st.predecessors(n))[0]
if st.nodes[n]['state'] and st.nodes[father]['state']:
direct_filiation_nb += 1
if st.nodes[n]['state']:
birth_times_trad.append(st.nodes[n]['birth_time'])
arch_dists.append(len(nx.shortest_path(st, source=archetype, target=n)))

deg_dist = Counter(degrees)
deg1 = deg_dist[1]
deg2 = deg_dist[2]
deg3 = deg_dist[3]
deg4 = deg_dist[4]
depth = max(arch_dists)
n_nodes = len(list(st.nodes()))

return [
n_living,
timelapse,
earliest_wit,
median_wit,
latest_wit,
n_nodes,
direct_filiation_nb,
deg1,
deg2,
deg3,
deg4,
depth
]

def convert_date(x):
pattern1 = re.compile('[0-9][0-9][0-9][0-9]')
pattern2 = re.compile('[0-9][0-9][0-9][0-9]-[0-9][0-9][0-9][0-9]')

if bool(pattern1.fullmatch(x)):
return int(x)
if bool(pattern2.fullmatch(x)):
xx = x.split('-')
return (int(xx[0]),int(xx[1]))

def top(x):
'''
Upper bound on witness date (single year or range)
'''
match x:
case (x1, x2):
return x2
case _:
return x

def bottom(x):
'''
Lower bound on witness date (single year or range)
'''
match x:
case (x1, x2):
return x1
case _:
return x


def expected_abs_diff_degenerate(c, a, b):
'''
Returns the expectation value of the timelapse between one date given as range
and one other as a single year
'''
if a == b:
return abs(a - c)
if a <= c <= b:
return ((c-a)**2 + (b-c)**2) / (2*(b-a))
elif c < a:
return (a+b)/2 - c
else:
return c - (a+b)/2

def expected_abs_diff(a, b, c, d):
'''
Return the expectation value of th a=1151, b=1200, c=1241, d=1260e timelapse between two dates given as
intervals
'''
if a == b:
return expected_abs_diff_degenerate(a, c, d)
if c == d:
return expected_abs_diff_degenerate(c, a, b)
elif a < b < c < d:
return (c+d-b-a)/2
elif a <= c <= d < b:
return (1/(b-a)) * ((1/2)*((d-a)*(c-a)+(b-d)*(b-c)) + (1/3)*(d-c)**2)
elif a <= c <= b <= d:
return (1/(b-a)) * ((1/2)* ((c-a)*(d-a) + (b-c)*(d-b)) + (1/(3*(d-c)))*(b-c)**3 )
else:
print(f"{a},{b} -- {c},{d} ")
raise ValueError("Must have a < b, c < d, and a < c")
38 changes: 37 additions & 1 deletion corpus_stemmata/Bogdanow_1960_SuiteMerlin/metadata.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ publicationPage: "188-198"
publicationLink: "https://doi.org/10.3406/roma.1960.3217"
workTitle: "Suite du Merlin"
workViaf: "293320611"
workOrigDate: "1201-1300"
workOrigDate: "1236-1245"
workOrigPlace: ""
workAuthor: ""
workAuthorViaf: ""
Expand All @@ -23,3 +23,39 @@ rootType: "archetype"
contributor: "Jean-Baptiste Camps"
contributorORCID: "0000-0003-0385-7037"
note: ""
wits:
- witSigla: "C"
witSignature: "Cambridge, University of Cambridge. Library, MS Add. 7071"
witOrigDate: "1301-1320"
witOrigPlace: "England or Northern France (DÉAF)"
witNotes: ""
witMsDesc: ""
witDigit: ""
- witSigla: "H"
witSignature: "London, British library, Add MS 38117"
witOrigDate: "1281-1320"
witOrigPlace: "pic. -- Paris; Nord-Est de la France (Legge et Vinaver); pic. déb. 14es. (DÉAF) -- Source: Arlima, DÉAF"
witNotes: ""
witMsDesc: ""
witDigit: ""
- witSigla: "S"
witSignature: "Siena, Archivio di Stato [without shelfmark]"
witOrigDate: "1281-1300"
witOrigPlace: ""
witNotes: "fragment"
witMsDesc: ""
witDigit: ""
- witSigla: "D"
witSignature: "Demanda (ed. 1535)"
witOrigDate: ""
witOrigPlace: ""
witNotes: ""
witMsDesc: ""
witDigit: ""
- witSigla: "D"
witSignature: "Demanda (ed. 1498)"
witOrigDate: ""
witOrigPlace: ""
witNotes: ""
witMsDesc: ""
witDigit: ""
2 changes: 1 addition & 1 deletion corpus_stemmata/Braunholtz_1921_Horn/metadata.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ note : "The stemma include all witnesses, except for the F2 fragment, that conta
wits:
- witSigla: "C"
witSignature: "Cambridge, University Library, Ff.6.17"
witOrigDate: ""
witOrigDate: "1201-1300"
witOrigPlace: ""
witNotes: ""
witMsDesc: ""
Expand Down
2 changes: 1 addition & 1 deletion corpus_stemmata/Cloetta_1911_Moniage1/metadata.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ publicationPage: "203"
publicationLink: ""
workTitle: "Moniage Guillaume"
workViaf: "184427604"
workOrigDate: ""
workOrigDate: "1134-1170"
workOrigPlace: ""
workAuthor: ""
workAuthorViaf: ""
Expand Down
2 changes: 1 addition & 1 deletion corpus_stemmata/Cloetta_1911_Moniage2/metadata.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ publicationPage: "241"
publicationLink: ""
workTitle: "Moniage Guillaume"
workViaf: "184427604"
workOrigDate: ""
workOrigDate: "1171-1190"
workOrigPlace: ""
workAuthor: ""
workAuthorViaf: ""
Expand Down
40 changes: 38 additions & 2 deletions corpus_stemmata/Constans_1890_Thebes/metadata.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ publicationPage: "LXVI"
publicationLink: ""
workTitle: "Roman de Thebes"
workViaf: "184279798"
workOrigDate: "1150 ca."
workOrigDate: "1146-1162"
workOrigPlace: ""
workAuthor: ""
workAuthorViaf: ""
Expand All @@ -22,4 +22,40 @@ extraStemmContam: "no"
rootType: "original"
contributor: "Jean-Baptiste Camps"
contributorORCID: "0000-0003-0385-7037"
note: "la contamination est notée sur le stemma via le signe d'addition, selon la première mode, et indiquée dans le texte. y a utilisé un modèle secondaire de la famille de x."
note: "la contamination est notée sur le stemma via le signe d'addition, selon la première mode, et indiquée dans le texte. y a utilisé un modèle secondaire de la famille de x. Vérifier les sigles dans l'édition."
wits:
- witSigla: "A"
witSignature: "Paris, Bibliothèque nationale de France, Français 375"
witOrigDate: "1288"
witOrigPlace: "Arras (Jonas)"
witNotes: ""
witMsDesc: ""
witDigit: ""
- witSigla: "B"
witSignature: "Paris, Bibliothèque nationale de France, Français 60"
witOrigDate: "1313-1349"
witOrigPlace: ""
witNotes: ""
witMsDesc: ""
witDigit: ""
- witSigla: "C"
witSignature: "Paris, Bibliothèque nationale de France, Français 784"
witOrigDate: "1241-1260"
witOrigPlace: ""
witNotes: ""
witMsDesc: ""
witDigit: ""
- witSigla: "P"
witSignature: "Cologny, Fondation Martin Bodmer, Cod. Bodmer 18"
witOrigDate: "1281-1300"
witOrigPlace: ""
witNotes: ""
witMsDesc: ""
witDigit: ""
- witSigla: "S"
witSignature: "London, British library, Add MS 34114"
witOrigDate: "1396-1405"
witOrigPlace: ""
witNotes: ""
witMsDesc: ""
witDigit: ""
Loading