Trees#

Loading a tree from a file and visualizing it with ascii_art()#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
print(tr.ascii_art())
                              /-Human
                    /edge.0--|
          /edge.1--|          \-HowlerMon
         |         |
         |          \-Mouse
-root----|
         |--NineBande
         |
          \-DogFaced

Writing a tree to a file#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
tr.write("data/temp.tree")

Getting the individual nodes of a tree by name#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
names = tr.get_node_names()
names[:4]
['root', 'edge.1', 'edge.0', 'Human']
names[4:]
names_nodes = tr.get_nodes_dict()
names_nodes["Human"]
Tree("Human;")
tr.get_node_matching_name("Mouse")
Tree("Mouse;")

Getting the name of a node (or a tree)#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
hu = tr.get_node_matching_name("Human")
tr.name
'root'
hu.name
'Human'

The object type of a tree and its nodes is the same#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
nodes = tr.get_nodes_dict()
hu = nodes["Human"]
type(hu)
cogent3.core.tree.PhyloNode
type(tr)
cogent3.core.tree.PhyloNode

Working with the nodes of a tree#

Get all the nodes, tips and edges

from cogent3 import load_tree

tr = load_tree("data/test.tree")
nodes = tr.get_nodes_dict()
for n in nodes.items():
    print(n)
('root', Tree("(((Human,HowlerMon),Mouse),NineBande,DogFaced);"))
('edge.1', Tree("((Human,HowlerMon),Mouse);"))
('edge.0', Tree("(Human,HowlerMon);"))
('Human', Tree("Human;"))
('HowlerMon', Tree("HowlerMon;"))
('Mouse', Tree("Mouse;"))
('NineBande', Tree("NineBande;"))
('DogFaced', Tree("DogFaced;"))

only the terminal nodes (tips)

for n in tr.iter_tips():
    print(n)
Human:0.0311054096183;
HowlerMon:0.0415847131449;
Mouse:0.277353608988;
NineBande:0.0939768158209;
DogFaced:0.113211053859;

for internal nodes (edges) we can use Newick format to simplify the output

from cogent3 import load_tree

tr = load_tree("data/test.tree")
for n in tr.iter_nontips():
    print(n.get_newick())
((Human,HowlerMon),Mouse);
(Human,HowlerMon);

Getting the path between two tips or edges (connecting edges)#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
edges = tr.get_connecting_edges("edge.1", "Human")
for edge in edges:
    print(edge.name)
edge.1
edge.0
Human

Getting the distance between two nodes#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
nodes = tr.get_nodes_dict()
hu = nodes["Human"]
mu = nodes["Mouse"]
hu.distance(mu)
hu.is_tip()
True

Getting the last common ancestor (LCA) for two nodes#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
nodes = tr.get_nodes_dict()
hu = nodes["Human"]
mu = nodes["Mouse"]
lca = hu.last_common_ancestor(mu)
lca
Tree("((Human,HowlerMon),Mouse);")
type(lca)
cogent3.core.tree.PhyloNode

Getting all the ancestors for a node#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
hu = tr.get_node_matching_name("Human")
for a in hu.ancestors():
    print(a.name)
edge.0
edge.1
root

Getting all the children for a node#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
node = tr.get_node_matching_name("edge.1")
children = list(node.iter_tips()) + list(node.iter_nontips())
for child in children:
    print(child.name)
Human
HowlerMon
Mouse
edge.0

Getting all the distances for a tree#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
dists = tr.get_distances()

We also show how to select a subset of distances involving just one species.

human_dists = [names for names in dists if "Human" in names]
for dist in human_dists:
    print(dist, dists[dist])
('Human', 'HowlerMon') 0.0726901227632
('HowlerMon', 'Human') 0.0726901227632
('Human', 'Mouse') 0.3467553610937
('Mouse', 'Human') 0.3467553610937
('Human', 'NineBande') 0.18310641816450002
('NineBande', 'Human') 0.18310641816450002
('Human', 'DogFaced') 0.2023406562026
('DogFaced', 'Human') 0.2023406562026

Getting the two nodes that are farthest apart#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
tr.max_tip_tip_distance()
(np.float64(0.4102925130849), ('Mouse', 'DogFaced'))

Get the nodes within a given distance#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
hu = tr.get_node_matching_name("Human")
tips = hu.tips_within_distance(0.2)
for t in tips:
    print(t)
HowlerMon:0.0415847131449;
NineBande:0.0939768158209;

Rerooting trees#

At a named node#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
print(tr.rooted_at("edge.0").ascii_art())
          /-Human
         |
-root----|--HowlerMon
         |
         |          /-Mouse
          \edge.0--|
                   |          /-NineBande
                    \edge.1--|
                              \-DogFaced

At the midpoint#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
print(tr.root_at_midpoint().ascii_art())
          /-Mouse
         |
-root----|                    /-Human
         |          /edge.0--|
         |         |          \-HowlerMon
          \edge.0.2|
                   |          /-NineBande
                    \edge.1--|
                              \-DogFaced
print(tr.ascii_art())
                              /-Human
                    /edge.0--|
          /edge.1--|          \-HowlerMon
         |         |
         |          \-------- /-Mouse
-root----|
         |--NineBande
         |
          \-DogFaced

Near a given tip#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
print(tr.ascii_art())
                              /-Human
                    /edge.0--|
          /edge.1--|          \-HowlerMon
         |         |
         |          \-Mouse
-root----|
         |--NineBande
         |
          \-DogFaced
print(tr.rooted_with_tip("Mouse").ascii_art())
                    /-Human
          /edge.0--|
         |          \-HowlerMon
         |
-root----|--Mouse
         |
         |          /-NineBande
          \edge.1--|
                    \-DogFaced

Tree representations#

Newick format#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
tr.get_newick()
'(((Human,HowlerMon),Mouse),NineBande,DogFaced);'
tr.get_newick(with_distances=True)
'(((Human:0.0311054096183,HowlerMon:0.0415847131449):0.0382963424874,Mouse:0.277353608988):0.0197278502379,NineBande:0.0939768158209,DogFaced:0.113211053859);'

XML format#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
xml = tr.get_xml()
for line in xml.splitlines():
    print(line)
<?xml version="1.0"?>
<clade>
  <clade>
     <param><name>length</name><value>0.0197278502379</value></param>
    <clade>
       <param><name>length</name><value>0.0382963424874</value></param>
      <clade>
         <name>Human</name>
         <param><name>length</name><value>0.0311054096183</value></param>
      </clade>
      <clade>
         <name>HowlerMon</name>
         <param><name>length</name><value>0.0415847131449</value></param>
      </clade>
    </clade>
    <clade>
       <name>Mouse</name>
       <param><name>length</name><value>0.277353608988</value></param>
    </clade>
  </clade>
  <clade>
     <name>NineBande</name>
     <param><name>length</name><value>0.0939768158209</value></param>
  </clade>
  <clade>
     <name>DogFaced</name>
     <param><name>length</name><value>0.113211053859</value></param>
  </clade>
</clade>

Tree traversal#

Here is the example tree for reference:

from cogent3 import load_tree

tr = load_tree("data/test.tree")
print(tr.ascii_art())
                              /-Human
                    /edge.0--|
          /edge.1--|          \-HowlerMon
         |         |
         |          \-Mouse
-root----|
         |--NineBande
         |
          \-DogFaced

Preorder#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
for t in tr.preorder():
    print(t.get_newick())
(((Human,HowlerMon),Mouse),NineBande,DogFaced);
((Human,HowlerMon),Mouse);
(Human,HowlerMon);
Human;
HowlerMon;
Mouse;
NineBande;
DogFaced;

Postorder#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
for t in tr.postorder():
    print(t.get_newick())
Human;
HowlerMon;
(Human,HowlerMon);
Mouse;
((Human,HowlerMon),Mouse);
NineBande;
DogFaced;
(((Human,HowlerMon),Mouse),NineBande,DogFaced);

Selecting subtrees#

One way to do it#

from cogent3 import load_tree

tr = load_tree("data/test.tree")
for tip in tr.iter_nontips():
    tip_names = tip.get_tip_names()
    print(tip_names)
    sub_tree = tr.get_sub_tree(tip_names)
    print(sub_tree.ascii_art())
['Human', 'HowlerMon', 'Mouse']
          /-Human
         |
-root----|--HowlerMon
         |
          \-Mouse
['Human', 'HowlerMon']
          /-Human
-root----|
          \-HowlerMon

Tree manipulation methods#

Pruning the tree#

Remove internal nodes with only one child. Create new connections and branch lengths (if tree is a PhyloNode) to reflect the change.

from cogent3 import make_tree

simple_tree_string = "(B:0.2,(D:0.4)E:0.5)F;"
simple_tree = make_tree(simple_tree_string)
print(simple_tree.ascii_art())
          /-B
-F-------|
          \E------- /-D
simple_tree.prune()
print(simple_tree.ascii_art())
          /-B
-F-------|
          \-D
print(simple_tree)
(B:0.2,D:0.9)F;

Create a full unrooted copy of the tree#

from cogent3 import load_tree

tr1 = load_tree("data/test.tree")
print(tr1.get_newick())
(((Human,HowlerMon),Mouse),NineBande,DogFaced);
tr2 = tr1.unrooted_deepcopy()
print(tr2.get_newick())
(((Human,HowlerMon),Mouse),NineBande,DogFaced);

Transform tree into a bifurcating tree#

Add internal nodes so that every node has 2 or fewer children.

from cogent3 import make_tree

tree_string = "(B:0.2,H:0.2,(C:0.3,D:0.4,E:0.1)F:0.5)G;"
tr = make_tree(tree_string)
print(tr.ascii_art())
          /-B
         |
         |--H
-G-------|
         |          /-C
         |         |
          \F-------|--D
                   |
                    \-E
print(tr.bifurcating().ascii_art())
          /-B
-G-------|
         |          /-H
          \--------|
                   |          /-C
                    \F-------|
                             |          /-D
                              \--------|
                                        \-E

Transform tree into a balanced tree#

Using a balanced tree can substantially improve performance of likelihood calculations. Note that the resulting tree has a different orientation with the effect that specifying clades or stems for model parameterisation should be done using the “outgroup_name” argument.

from cogent3 import load_tree

tr = load_tree("data/test.tree")
print(tr.ascii_art())
                              /-Human
                    /edge.0--|
          /edge.1--|          \-HowlerMon
         |         |
         |          \-Mouse
-root----|
         |--NineBande
         |
          \-DogFaced
print(tr.balanced().ascii_art())
                    /-Human
          /edge.0--|
         |          \-HowlerMon
         |
-root----|--Mouse
         |
         |          /-NineBande
          \edge.1--|
                    \-DogFaced

Test two trees for same topology#

Branch lengths don’t matter.

from cogent3 import make_tree

tr1 = make_tree("(B:0.2,(C:0.2,D:0.2)F:0.2)G;")
tr2 = make_tree("((C:0.1,D:0.1)F:0.1,B:0.1)G;")
tr1.same_topology(tr2)
True

Measure topological distances between two trees#

A number of topological tree distance metrics are available. They include:

  • The Robinson-Foulds Distance for rooted trees.

  • The Matching Cluster Distance for rooted trees.

  • The Robinson-Foulds Distance for unrooted trees.

  • The Lin-Rajan-Moret Distance for unrooted trees.

There are several variations of the Robinson-Foulds metric in the literature. The definition used by cogent3 is the cardinality of the symmetric difference of the sets of clades/splits in the two rooted/unrooted trees. Other definitions sometimes divide this by two, or normalise it to the unit interval.

The Robinson-Foulds distance is quick to compute, but is known to saturate quickly. Moving a single leaf in a tree can maximise this metric.

The Matching Cluster and Lin-Rajan-Moret are two matching-based distances that are more statistically robust. Unlike the Robinson-Foulds distance which counts how many of the splits/clades are not exactly same, the matching-based distances measures the degree by which the splits/clades are different. The matching-based distances solve a min-weight matching problem, which for large trees may take longer to compute.

# Distance metrics for rooted trees
from cogent3 import make_tree

tr1 = make_tree(treestring="(a,(b,(c,(d,e))));")
tr2 = make_tree(treestring="(e,(d,(c,(b,a))));")

mc_distance = tr1.tree_distance(tr2, method="matching_cluster") # or method="mc" or method="matching"
rooted_rf_distance = tr1.tree_distance(tr2, method="rooted_robinson_foulds") # or method="rrf" or method="rf"

print("Matching Cluster Distance:", mc_distance)
print("Rooted Robinson Foulds Distance:", rooted_rf_distance)
Matching Cluster Distance: 10
Rooted Robinson Foulds Distance: 6
# Distance metrics for unrooted trees
from cogent3 import make_tree

tr1 = make_tree(treestring="(a,b,(c,(d,e)));")
tr2 = make_tree(treestring="((a,c),(b,d),e);")

lrm_distance = tr1.tree_distance(tr2, method="lin_rajan_moret") # or method="lrm" or method="matching"
unrooted_rf_distance = tr1.tree_distance(tr2, method="unrooted_robinson_foulds") # or method="urf" or method="rf"

print("Lin-Rajan-Moret Distance:", lrm_distance)
print("Unrooted Robinson Foulds Distance:", unrooted_rf_distance)
Lin-Rajan-Moret Distance: 3
Unrooted Robinson Foulds Distance: 4

Calculate each node’s maximum distance to a tip#

Sets each node’s “TipDistance” attribute to be the distance from that node to its most distant tip.

from cogent3 import make_tree

tr = make_tree("(B:0.2,(C:0.3,D:0.4)F:0.5)G;")
print(tr.ascii_art())
          /-B
-G-------|
         |          /-C
          \F-------|
                    \-D
tr.set_tip_distances()
for t in tr.preorder():
    print(t.name, t.TipDistance)
G 0.9
B 0
F 0.4
C 0
D 0

Scale branch lengths in place to integers for ascii output#

from cogent3 import make_tree

tr = make_tree("(B:0.2,(C:0.3,D:0.4)F:0.5)G;")
print(tr)
(B:0.2,(C:0.3,D:0.4)F:0.5)G;
tr.scale_branch_lengths()
print(tr)
(B:22,(C:33,D:44)F:56)G;

Get tip-to-tip distances#

Get a distance matrix between all pairs of tips and a list of the tip nodes.

from cogent3 import make_tree

tr = make_tree("(B:3,(C:2,D:4)F:5)G;")
d, tips = tr.tip_to_tip_distances()
for i, t in enumerate(tips):
    print(t.name, d[i])
B [ 0. 10. 12.]
C [10.  0.  6.]
D [12.  6.  0.]

Compare two trees using tip-to-tip distance matrices#

Score ranges from 0 (minimum distance) to 1 (maximum distance). The default is to use Pearson’s correlation, in which case a score of 0 means that the Pearson’s correlation was perfectly good (1), and a score of 1 means that the Pearson’s correlation was perfectly bad (-1).

Note: automatically strips out the names that don’t match.

from cogent3 import make_tree

tr1 = make_tree("(B:2,(C:3,D:4)F:5)G;")
tr2 = make_tree("(C:2,(B:3,D:4)F:5)G;")
tr1.compare_by_tip_distances(tr2)
np.float64(0.08352668213457076)