Skip to content

mds

Functions for doing multidimensional scaling.

classic_mds(distance_matrix, ndim=3)

Perform classical (Torgerson) MDS. This assumes a complete euclidean distance matrix.

Parameters:

Name Type Description Default
distance_matrix NDArray[floating]

The euclidean distance matrix. Should be (npoint, npoint) and complete (no missing distances).

required
ndim int

The number of dimensions to scale to.

3

Returns:

Name Type Description
coords NDArray[floating]

The output coordinates, will be (npoint, ndim). Points are in the same order as distance_matrix.

Raises:

Type Description
ValueError

If distance_matrix is not square. If distance_matrix has non-finite values.

Source code in megham/mds.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def classic_mds(
    distance_matrix: NDArray[np.floating], ndim: int = 3
) -> NDArray[np.floating]:
    """
    Perform classical (Torgerson) MDS. This assumes a complete euclidean distance matrix.

    Parameters
    ----------
    distance_matrix : NDArray[np.floating]
        The euclidean distance matrix.
        Should be (npoint, npoint) and complete (no missing distances).
    ndim : int, default: 3
        The number of dimensions to scale to.

    Returns
    -------
    coords : NDArray[np.floating]
        The output coordinates, will be (npoint, ndim).
        Points are in the same order as distance_matrix.

    Raises
    ------
    ValueError
        If distance_matrix is not square.
        If distance_matrix has non-finite values.
    """
    npoint = len(distance_matrix)
    if distance_matrix.shape != (npoint, npoint):
        raise ValueError("Distance matrix should be square")
    if not np.all(np.isfinite(distance_matrix)):
        raise ValueError("Distance matrix must only have finite values")

    d_sq = distance_matrix**2
    cent_mat = np.eye(npoint) - np.ones_like(distance_matrix) / npoint
    double_cent = -0.5 * cent_mat @ d_sq @ cent_mat

    eigen_vals, eigen_vecs = np.linalg.eig(double_cent)
    eigen_vals = eigen_vals[-1 * ndim :]
    eigen_vecs = eigen_vecs[:, -1 * ndim :]
    tol = 1e16
    eigen_vals[(eigen_vals < 0) & (eigen_vals > -tol)] = 0.0

    coords = eigen_vecs @ (np.diag(np.sqrt(eigen_vals)))
    return np.real(coords)

metric_mds(distance_matrix, ndim=3, weights=None, guess=None, use_smacof=True, **kwargs)

Perform metric MDS. This is useful over classical MDS if you have missing values or need to weight distances.

Parameters:

Name Type Description Default
distance_matrix NDArray[floating]

The distance matrix. Should be (npoint, npoint), unknown distances should be set to nan.

required
ndim int

The number of dimensions to scale to.

3
weights Optional[NDArray[floating]]

How much to weigh each distance in distance_matrix in the metric. Weights should be finite and non-negative, invalid weights will be set to 0. Should be (npoint, npoint) and have 1-to-1 correspondance with distance_matrix.

None
guess Optional[NDArray[floating]]

Initial guess at coordinates. Should be (npoint, ndim) and in the same order as distance_matrix.

None
use_smacof bool

If True use smacof for the optimization. If False use scipy.optimize.minimize.

True
**kwargs

Keyword arguments to pass to smacof or scipy.optimize.minimize.

{}

Returns:

Name Type Description
coords NDArray[floating]

The output coordinates, will be (npoint, ndim). Points are in the same order as distance_matrix.

Raises:

Type Description
ValueError

If distance_matrix is not square. If the shape of weights or guess is not consistant with distance_matrix.

Source code in megham/mds.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
def metric_mds(
    distance_matrix: NDArray[np.floating],
    ndim: int = 3,
    weights: Optional[NDArray[np.floating]] = None,
    guess: Optional[NDArray[np.floating]] = None,
    use_smacof: bool = True,
    **kwargs,
) -> NDArray[np.floating]:
    """
    Perform metric MDS.
    This is useful over classical MDS if you have missing values or need to weight distances.

    Parameters
    ----------
    distance_matrix : NDArray[np.floating]
        The distance matrix.
        Should be (npoint, npoint), unknown distances should be set to nan.
    ndim : int, default: 3
        The number of dimensions to scale to.
    weights : Optional[NDArray[np.floating]], default: None
        How much to weigh each distance in distance_matrix in the metric.
        Weights should be finite and non-negative, invalid weights will be set to 0.
        Should be (npoint, npoint) and have 1-to-1 correspondance with distance_matrix.
    guess : Optional[NDArray[np.floating]], default: None
        Initial guess at coordinates.
        Should be (npoint, ndim) and in the same order as distance_matrix.
    use_smacof : bool, default: True
        If True use smacof for the optimization.
        If False use scipy.optimize.minimize.
    **kwargs
        Keyword arguments to pass to smacof or scipy.optimize.minimize.

    Returns
    -------
    coords : NDArray[np.floating]
        The output coordinates, will be (npoint, ndim).
        Points are in the same order as distance_matrix.

    Raises
    ------
    ValueError
        If distance_matrix is not square.
        If the shape of weights or guess is not consistant with distance_matrix.
    """
    npoint = len(distance_matrix)
    if distance_matrix.shape != (npoint, npoint):
        raise ValueError("Distance matrix should be square")

    if weights is None:
        weights = np.ones_like(distance_matrix)
    elif weights.shape != (npoint, npoint):
        raise ValueError("Weights must match distance_matrix")
    else:
        neg_msk = weights < 0
        if np.any(neg_msk):
            logger.warn("Negetive weight found, setting to 0.")
            weights[neg_msk] = 0
        nfin_msk = ~np.isfinite(weights)
        if np.any(nfin_msk):
            logger.warn("Non-finite weight found, setting to 0.")
            weights[nfin_msk] = 0

    if guess is None:
        logger.info("No initial guess provided, using a random set of points.")
        guess = _init_coords(npoint, ndim, distance_matrix)
    elif guess.shape != (npoint, ndim):
        raise ValueError("Guess must be (npoint, ndim)")

    if use_smacof:
        coords, _ = smacof(
            metric_stress, guess, distance_matrix, weights, ndim, **kwargs
        )
    else:
        res = opt.minimize(
            metric_stress,
            guess.ravel(),
            args=(distance_matrix.astype(float), weights.astype(float), ndim),
            **kwargs,
        )
        logger.info(
            "Finished with a final stress of %f\n Optimizer message: %s",
            res.fun,
            res.message,
        )
        coords = res.x.reshape((npoint, ndim))
    return coords

metric_stress(coords, distance_matrix, weights, ndim)

Stress that is minimized for metric MDS.

Parameters:

Name Type Description Default
coords NDArray[floating]

The coordinates to calculate stress at. Should be (npoint, ndim)

required
distance_matrix NDArray[floating]

The distance matrix. Should be (npoint, npoint), unknown distances should be set to nan.

required
weights NDArray[floating]

How much to weigh each distance in distance_matrix in the metric. Should be (npoint, npoint) and have 1-to-1 correspondance with distance_matrix.

required
ndim int

The number of dimensions to scale to.

3

Returns:

Name Type Description
stress float

The stress of the system at the with the given coordinates.

Source code in megham/mds.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
def metric_stress(
    coords: NDArray[np.floating],
    distance_matrix: NDArray[np.floating],
    weights: NDArray[np.floating],
    ndim: int,
) -> float:
    """
    Stress that is minimized for metric MDS.

    Parameters
    ----------
    coords : NDArray[np.floating]
        The coordinates to calculate stress at.
        Should be (npoint, ndim)
    distance_matrix : NDArray[np.floating]
        The distance matrix.
        Should be (npoint, npoint), unknown distances should be set to nan.
    weights : NDArray[np.floating]
        How much to weigh each distance in distance_matrix in the metric.
        Should be (npoint, npoint) and have 1-to-1 correspondance with distance_matrix.
    ndim : int, default: 3
        The number of dimensions to scale to.

    Returns
    -------
    stress : float
        The stress of the system at the with the given coordinates.
    """
    npoint = len(distance_matrix)
    idx = np.triu_indices(npoint, 1)
    edm = make_edm(coords.reshape((npoint, ndim)))
    stress = np.sqrt(np.nansum(weights[idx] * (distance_matrix[idx] - edm[idx]) ** 2))
    return stress

nonmetric_mds(distance_matrix, ndim=3, weights=None, guess=None, epsilon_outer=1e-10, max_iters_outer=200, use_smacof=False, **kwargs)

Perform nonmetric MDS. This is useful over metric MDS if you have some wierd dissimilarity (ie. not euclidean).

Parameters:

Name Type Description Default
distance_matrix NDArray[floating]

The distance matrix. Should be (npoint, npoint), unknown distances should be set to nan.

required
ndim int

The number of dimensions to scale to.

3
weights Optional[NDArray[floating]]

How much to weigh each distance in distance_matrix in the metric. Weights should be finite and non-negative, invalid weights will be set to 0. Should be (npoint, npoint) and have 1-to-1 correspondance with distance_matrix.

None
guess Optional[NDArray[floating]]

Initial guess at coordinates. Should be (npoint, ndim) and in the same order as distance_matrix.

None
epsilon_outer float

The difference in stress between iterations before stopping outer optimization.

1e-10
max_iters_outer int

Maximum number of iterations for outer optimization.

200
use_smacof bool

If True use smacof for the optimization. If False use scipy.optimize.minimize.

False
**kwargs

Keyword arguments to pass to smacof or scipy.optimize.minimize.

{}

Returns:

Name Type Description
coords NDArray[floating]

The output coordinates, will be (npoint, ndim). Points are in the same order as distance_matrix.

Raises:

Type Description
ValueError

If distance_matrix is not square. If the shape of weights or guess is not consistant with distance_matrix.

Source code in megham/mds.py
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
def nonmetric_mds(
    distance_matrix: NDArray[np.floating],
    ndim: int = 3,
    weights: Optional[NDArray[np.floating]] = None,
    guess: Optional[NDArray[np.floating]] = None,
    epsilon_outer: float = 1e-10,
    max_iters_outer: int = 200,
    use_smacof: bool = False,
    **kwargs,
) -> NDArray[np.floating]:
    """
    Perform nonmetric MDS.
    This is useful over metric MDS if you have some wierd dissimilarity
    (ie. not euclidean).

    Parameters
    ----------
    distance_matrix : NDArray[np.floating]
        The distance matrix.
        Should be (npoint, npoint), unknown distances should be set to nan.
    ndim : int, default: 3
        The number of dimensions to scale to.
    weights : Optional[NDArray[np.floating]], default: None
        How much to weigh each distance in distance_matrix in the metric.
        Weights should be finite and non-negative, invalid weights will be set to 0.
        Should be (npoint, npoint) and have 1-to-1 correspondance with distance_matrix.
    guess : Optional[NDArray[np.floating]], default: None
        Initial guess at coordinates.
        Should be (npoint, ndim) and in the same order as distance_matrix.
    epsilon_outer : float, default: 1e-10
        The difference in stress between iterations before stopping outer optimization.
    max_iters_outer : int, default: 200
        Maximum number of iterations for outer optimization.
    use_smacof : bool, default: False
        If True use smacof for the optimization.
        If False use scipy.optimize.minimize.
    **kwargs
        Keyword arguments to pass to smacof or scipy.optimize.minimize.

    Returns
    -------
    coords : NDArray[np.floating]
        The output coordinates, will be (npoint, ndim).
        Points are in the same order as distance_matrix.

    Raises
    ------
    ValueError
        If distance_matrix is not square.
        If the shape of weights or guess is not consistant with distance_matrix.
    """
    if use_smacof:
        logger.warn(
            "You are using SMACOF with nonmetric mds, this doesn't currently work very well..."
        )
        if "verbose" not in kwargs:
            kwargs["verbose"] = False
    npoint = len(distance_matrix)
    if distance_matrix.shape != (npoint, npoint):
        raise ValueError("Distance matrix should be square")

    if weights is None:
        weights = np.ones_like(distance_matrix)
    elif weights.shape != (npoint, npoint):
        raise ValueError("Weights must match distance_matrix")
    else:
        neg_msk = weights < 0
        if np.any(neg_msk):
            logger.warn("Negetive weight found, setting to 0.")
            weights[neg_msk] = 0
        nfin_msk = not np.isfinite(weights)
        if np.any(nfin_msk):
            logger.warn("Non-finite weight found, setting to 0.")
            weights[nfin_msk] = 0

    if guess is None:
        logger.info("No initial guess provided, using a random set of points.")
        guess = _init_coords(npoint, ndim, distance_matrix)
    elif guess.shape != (npoint, ndim):
        raise ValueError("Guess must be (npoint, ndim)")

    idx = np.triu_indices(npoint, 1)
    flat_dist = np.ravel(distance_matrix[idx])
    flat_weight = np.ravel(weights[idx])
    msk = np.isfinite(flat_dist) + np.isfinite(flat_weight)
    flat_dist = flat_dist[msk]
    flat_weight = flat_weight[msk]
    f_dist = np.zeros_like(distance_matrix)

    stress = np.inf
    i = 0
    ir = IsotonicRegression()
    coords = guess.copy()
    for i in range(max_iters_outer):
        _stress = stress
        # Make a guess at f
        edm = make_edm(guess)[idx][msk]
        _f_dist_flat = ir.fit_transform(flat_dist, edm, sample_weight=flat_weight)
        f_dist_flat = np.nan + np.empty(msk.shape)
        f_dist_flat[msk] = _f_dist_flat
        f_dist[idx] = f_dist_flat

        # Solve for the coordinates at this f
        if use_smacof:
            guess, stress = smacof(
                nonmetric_stress, coords, f_dist, weights, ndim, **kwargs
            )
        else:
            res = opt.minimize(
                nonmetric_stress,
                coords.ravel(),
                args=(f_dist.astype(float), weights.astype(float), ndim),
                **kwargs,
            )
            guess = res.x.reshape((npoint, ndim))
            stress = res.fun
        if guess is None:
            raise RuntimeError("Current guess is None, something went wrong...")
        coords = guess
        if _stress - stress < epsilon_outer:
            break
        if stress == 0:
            break
    logger.info("Took %d iterations with a final stress of %f", i + 1, stress)

    return coords

nonmetric_stress(coords, f_dist, weights, ndim)

Stress that is minimized for nonmetric MDS.

Parameters:

Name Type Description Default
coords NDArray[floating]

The coordinates to calculate stress at. Should be (npoint, ndim)

required
f_dist NDArray[floating]

The distance matrix with the isotonic regression applied. Should be (npoint, npoint), unknown distances should be set to nan.

required
weights NDArray[floating]

How much to weigh each distance in distance_matrix in the metric. Should be (npoint, npoint) and have 1-to-1 correspondance with distance_matrix.

required
ndim int

The number of dimensions to scale to.

3

Returns:

Name Type Description
stress float

The stress of the system at the with the given coordinates.

Source code in megham/mds.py
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
def nonmetric_stress(
    coords: NDArray[np.floating],
    f_dist: NDArray[np.floating],
    weights: NDArray[np.floating],
    ndim: int,
) -> float:
    """
    Stress that is minimized for nonmetric MDS.

    Parameters
    ----------
    coords : NDArray[np.floating]
        The coordinates to calculate stress at.
        Should be (npoint, ndim)
    f_dist : NDArray[np.floating]
        The distance matrix with the isotonic regression applied.
        Should be (npoint, npoint), unknown distances should be set to nan.
    weights : NDArray[np.floating]
        How much to weigh each distance in distance_matrix in the metric.
        Should be (npoint, npoint) and have 1-to-1 correspondance with distance_matrix.
    ndim : int, default: 3
        The number of dimensions to scale to.

    Returns
    -------
    stress : float
        The stress of the system at the with the given coordinates.
    """
    npoint = len(f_dist)
    idx = np.triu_indices(npoint, 1)
    edm = make_edm(coords.reshape((npoint, ndim)))

    num = np.nansum(weights[idx] * (f_dist[idx] - edm[idx]) ** 2)
    denom = np.nansum(edm[idx] ** 2)
    stress = np.sqrt(num / denom)

    return stress

smacof(stress_func, coords, distance_matrix, weights, ndim, max_iters=10000, epsilon=1e-10, verbose=True)

SMACOF algorithm for multidimensional scaling.

Parameters:

Name Type Description Default
stress_func Callable[[NDArray[floating], NDArray[floating], NDArray[floating], int], float]

The stress function to use.

required
coords NDArray[floating]

Initial guess of coordinates to calculate stress at. Should be (npoint, ndim)

required
distance_matrix NDArray[floating]

The distance matrix. Should be (npoint, npoint), unknown distances should be set to nan.

required
weights NDArray[floating]

How much to weigh each distance in distance_matrix in the metric. Should be (npoint, npoint) and have 1-to-1 correspondance with distance_matrix.

required
ndim int

The number of dimensions to scale to.

3
epsilon float

The difference in stress between iterations before stopping.

1e-10
max_iters int

The maximum iterations to run for.

10000
verbose bool

Sets the verbosity of this function. If True logs at the INFO level. If False logs at the DEBUG level.

True

Returns:

Name Type Description
coords NDArray[floating]

The coordinates as optimized by SMACOF.

stress float

The stress of the system at the final iteration.

Source code in megham/mds.py
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def smacof(
    stress_func: Callable[
        [NDArray[np.floating], NDArray[np.floating], NDArray[np.floating], int], float
    ],
    coords: NDArray[np.floating],
    distance_matrix: NDArray[np.floating],
    weights: NDArray[np.floating],
    ndim: int,
    max_iters: int = 10000,
    epsilon: float = 1e-10,
    verbose: bool = True,
) -> tuple[NDArray[np.floating], float]:
    """
    SMACOF algorithm for multidimensional scaling.

    Parameters
    ----------
    stress_func : Callable[[NDArray[np.floating], NDArray[np.floating], NDArray[np.floating], int], float]
        The stress function to use.
    coords : NDArray[np.floating]
        Initial guess of coordinates to calculate stress at.
        Should be (npoint, ndim)
    distance_matrix : NDArray[np.floating]
        The distance matrix.
        Should be (npoint, npoint), unknown distances should be set to nan.
    weights : NDArray[np.floating]
        How much to weigh each distance in distance_matrix in the metric.
        Should be (npoint, npoint) and have 1-to-1 correspondance with distance_matrix.
    ndim : int, default: 3
        The number of dimensions to scale to.
    epsilon : float, default: 1e-10
        The difference in stress between iterations before stopping.
    max_iters : int, default: 10000
        The maximum iterations to run for.
    verbose : bool, default: True
        Sets the verbosity of this function.
        If True logs at the INFO level.
        If False logs at the DEBUG level.

    Returns
    -------
    coords : NDArray[np.floating]
        The coordinates as optimized by SMACOF.
    stress : float
        The stress of the system at the final iteration.
    """
    if verbose:
        log = logger.info
    else:
        log = logger.debug
    i = 0
    npoint = len(distance_matrix)
    stress = stress_func(coords, distance_matrix, weights, ndim)
    for i in range(max_iters):
        if stress == 0:
            break
        _stress = stress

        edm = make_edm(coords)

        B = -1 * weights * distance_matrix / edm
        B[edm == 0] = 0
        B[~np.isfinite(B)] = 0
        B[np.diag_indices_from(B)] -= np.sum(B, axis=1)

        guess = np.dot(B, coords) / npoint

        stress = stress_func(guess, distance_matrix, weights, ndim)
        coords = guess
        if _stress - stress < epsilon:
            break
    log("SMACOF took %d iterations with a final stress of %f", i + 1, stress)
    return coords, stress