
# coding: utf-8

# #### In this example, we cluster our alanine dipeptide trajectory using the [RMSD distance metric](http://en.wikipedia.org/wiki/Root-mean-square_deviation_of_atomic_positions) and [Ward's method](http://en.wikipedia.org/wiki/Ward's_method).

# In[ ]:

get_ipython().magic(u'pylab inline')
import mdtraj as md
import numpy as np
import scipy.cluster.hierarchy


# Let's load up our trajectory. This is the trajectory that we generated in the "Running a simulation in OpenMM and analyzing the results with mdtraj" example. The first step is to build the rmsd cache, which precalculates some values for the RMSD computation.

# In[ ]:

traj = md.load('ala2.h5')


# In[ ]:

# Lets compute all pairwise rmsds between conformations.

distances = np.empty((traj.n_frames, traj.n_frames))
for i in range(traj.n_frames):
    distances[i] = md.rmsd(traj, traj, i)
print 'Max pairwise rmsd: %f nm' % np.max(distances)


# In[ ]:

# scipy.cluster implements the ward linkage
# algorithm (among others)
linkage = scipy.cluster.hierarchy.ward(distances)


# In[ ]:

# Lets plot the resulting dendrogram.

figure()
title('RMSD Ward hierarchical clustering')
graph = scipy.cluster.hierarchy.dendrogram(linkage, no_labels=True, count_sort='descendent')

