Source code for fssa.fssa

#!/usr/bin/env python
# encoding: utf-8

r"""
Low-level routines for finite-size scaling analysis

See Also
--------

fssa : The high-level module

Notes
-----

The **fssa** package provides routines to perform finite-size scaling analyses
on experimental data [10]_ [11]_.

It has been inspired by Oliver Melchert and his superb **autoScale** package
[3]_.

References
----------

.. [10] M. E. J. Newman and G. T. Barkema, Monte Carlo Methods in Statistical
   Physics (Oxford University Press, 1999)

.. [11] K. Binder and D. W. Heermann, `Monte Carlo Simulation in Statistical
   Physics <http://dx.doi.org/10.1007/978-3-642-03163-2>`_ (Springer, Berlin,
   Heidelberg, 2010)

.. [3] O. Melchert, `arXiv:0910.5403 <http://arxiv.org/abs/0910.5403>`_
   (2009)

"""

# Python 2/3 compatibility
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)

import warnings
from builtins import *
from collections import namedtuple

import numpy as np
import numpy.ma as ma
import scipy.optimize

from .optimize import _minimize_neldermead


[docs]class ScaledData(namedtuple('ScaledData', ['x', 'y', 'dy'])): """ A :py:func:`namedtuple <collections.namedtuple>` for :py:func:`scaledata` output """ # set this to keep memory requirements low, according to # http://docs.python.org/3/library/collections.html#namedtuple-factory-function-for-tuples-with-named-fields __slots__ = ()
[docs]def scaledata(l, rho, a, da, rho_c, nu, zeta): r''' Scale experimental data according to critical exponents Parameters ---------- l, rho : 1-D array_like finite system sizes `l` and parameter values `rho` a, da : 2-D array_like of shape (`l`.size, `rho`.size) experimental data `a` with standard errors `da` obtained at finite system sizes `l` and parameter values `rho`, with ``a.shape == da.shape == (l.size, rho.size)`` rho_c : float in range [rho.min(), rho.max()] (assumed) critical parameter value with ``rho_c >= rho.min() and rho_c <= rho.max()`` nu, zeta : float (assumed) critical exponents Returns ------- :py:class:`ScaledData` scaled data `x`, `y` with standard errors `dy` x, y, dy : ndarray two-dimensional arrays of shape ``(l.size, rho.size)`` Notes ----- Scale data points :math:`(\varrho_j, a_{ij}, da_{ij})` observed at finite system sizes :math:`L_i` and parameter values :math:`\varrho_i` according to the finite-size scaling ansatz .. math:: L^{-\zeta/\nu} a_{ij} = \tilde{f}\left( L^{1/\nu} (\varrho_j - \varrho_c) \right). The output is the scaled data points :math:`(x_{ij}, y_{ij}, dy_{ij})` with .. math:: x_{ij} & = L_i^{1/\nu} (\varrho_j - \varrho_c) \\ y_{ij} & = L_i^{-\zeta/\nu} a_{ij} \\ dy_{ij} & = L_i^{-\zeta/\nu} da_{ij} such that all data points :ref:`collapse <data-collapse-method>` onto the single curve :math:`\tilde{f}(x)` with the right choice of :math:`\varrho_c, \nu, \zeta` [4]_ [5]_. Raises ------ ValueError If `l` or `rho` is not 1-D array_like, if `a` or `da` is not 2-D array_like, if the shape of `a` or `da` differs from ``(l.size, rho.size)`` References ---------- .. [4] M. E. J. Newman and G. T. Barkema, Monte Carlo Methods in Statistical Physics (Oxford University Press, 1999) .. [5] K. Binder and D. W. Heermann, `Monte Carlo Simulation in Statistical Physics <http://dx.doi.org/10.1007/978-3-642-03163-2>`_ (Springer, Berlin, Heidelberg, 2010) ''' # l should be 1-D array_like l = np.asanyarray(l) if l.ndim != 1: raise ValueError("l should be 1-D array_like") # rho should be 1-D array_like rho = np.asanyarray(rho) if rho.ndim != 1: raise ValueError("rho should be 1-D array_like") # a should be 2-D array_like a = np.asanyarray(a) if a.ndim != 2: raise ValueError("a should be 2-D array_like") # a should have shape (l.size, rho.size) if a.shape != (l.size, rho.size): raise ValueError("a should have shape (l.size, rho.size)") # da should be 2-D array_like da = np.asanyarray(da) if da.ndim != 2: raise ValueError("da should be 2-D array_like") # da should have shape (l.size, rho.size) if da.shape != (l.size, rho.size): raise ValueError("da should have shape (l.size, rho.size)") # rho_c should be float rho_c = float(rho_c) # rho_c should be in range if rho_c > rho.max() or rho_c < rho.min(): warnings.warn("rho_c is out of range", RuntimeWarning) # nu should be float nu = float(nu) # zeta should be float zeta = float(zeta) l_mesh, rho_mesh = np.meshgrid(l, rho, indexing='ij') x = np.power(l_mesh, 1. / nu) * (rho_mesh - rho_c) y = np.power(l_mesh, - zeta / nu) * a dy = np.power(l_mesh, - zeta / nu) * da return ScaledData(x, y, dy)
def _wls_linearfit_predict(x, w, wx, wy, wxx, wxy, select): """ Predict a point according to a weighted least squares linear fit of the data This function is a helper function for :py:func:`quality`. It is not supposed to be called directly. Parameters ---------- x : float The position for which to predict the function value w : ndarray The pre-calculated weights :math:`w_l` wx : ndarray The pre-calculated weighted `x` data :math:`w_l x_l` wy : ndarray The pre-calculated weighted `y` data :math:`w_l y_l` wxx : ndarray The pre-calculated weighted :math:`x^2` data :math:`w_l x_l^2` wxy : ndarray The pre-calculated weighted `x y` data :math:`w_l x_l y_l` select : indexing array To select the subset from the `w`, `wx`, `wy`, `wxx`, `wxy` data Returns ------- float, float The estimated value of the master curve for the selected subset and the squared standard error """ # linear fit k = w[select].sum() kx = wx[select].sum() ky = wy[select].sum() kxx = wxx[select].sum() kxy = wxy[select].sum() delta = k * kxx - kx ** 2 m = 1. / delta * (k * kxy - kx * ky) b = 1. / delta * (kxx * ky - kx * kxy) b_var = kxx / delta m_var = k / delta bm_covar = - kx / delta # estimation y = b + m * x dy2 = b_var + 2 * bm_covar * x + m_var * x**2 return y, dy2 def _jprimes(x, i, x_bounds=None): """ Helper function to return the j' indices for the master curve fit This function is a helper function for :py:func:`quality`. It is not supposed to be called directly. Parameters ---------- x : mapping to ndarrays The x values. i : int The row index (finite size index) x_bounds : 2-tuple, optional bounds on x values Returns ------- ret : mapping to ndarrays Has the same keys and shape as `x`. Its element ``ret[i'][j]`` is the j' such that :math:`x_{i'j'} \leq x_{ij} < x_{i'(j'+1)}`. If no such j' exists, the element is np.nan. Convert the element to int to use as an index. """ j_primes = - np.ones_like(x) try: x_masked = ma.masked_outside(x, x_bounds[0], x_bounds[1]) except (TypeError, IndexError): x_masked = ma.asanyarray(x) k, n = x.shape # indices of lower and upper bounds edges = ma.notmasked_edges(x_masked, axis=1) x_lower = np.zeros(k, dtype=int) x_upper = np.zeros(k, dtype=int) x_lower[edges[0][0]] = edges[0][-1] x_upper[edges[-1][0]] = edges[-1][-1] for i_prime in range(k): if i_prime == i: j_primes[i_prime][:] = np.nan continue jprimes = np.searchsorted( x[i_prime], x[i], side='right' ).astype(float) - 1 jprimes[ np.logical_or( jprimes < x_lower[i_prime], jprimes >= x_upper[i_prime] ) ] = np.nan j_primes[i_prime][:] = jprimes return j_primes def _select_mask(j, j_primes): """ Return a boolean mask for selecting the data subset according to the j' Parameters ---------- j : int current j index j_primes : ndarray result from _jprimes call """ ret = np.zeros_like(j_primes, dtype=bool) my_iprimes = np.invert(np.isnan(j_primes[:, j])).nonzero()[0] my_jprimes = j_primes[my_iprimes, j] my_jprimes = my_jprimes.astype(np.int) ret[my_iprimes, my_jprimes] = True ret[my_iprimes, my_jprimes + 1] = True return ret
[docs]def quality(x, y, dy, x_bounds=None): r''' Quality of data collapse onto a master curve defined by the data This is the reduced chi-square statistic for a data fit except that the master curve is fitted from the data itself. Parameters ---------- x, y, dy : 2-D array_like output from :py:func:`scaledata`, scaled data `x`, `y` with standard errors `dy` x_bounds : tuple of floats, optional lower and upper bound for scaled data `x` to consider Returns ------- float the quality of the data collapse Raises ------ ValueError if not all arrays `x`, `y`, `dy` have dimension 2, or if not all arrays are of the same shape, or if `x` is not sorted along rows (``axis=1``), or if `dy` does not have only positive entries Notes ----- This is the implementation of the reduced :math:`\chi^2` quality function :math:`S` by Houdayer & Hartmann [6]_. It should attain a minimum of around :math:`1` for an optimal fit, and be much larger otherwise. For further information, see the :ref:`quality-function` section in the manual. References ---------- .. [6] J. Houdayer and A. Hartmann, Physical Review B 70, 014418+ (2004) `doi:10.1103/physrevb.70.014418 <http://dx.doi.org/doi:10.1103/physrevb.70.014418>`_ ''' # arguments should be 2-D array_like x = np.asanyarray(x) y = np.asanyarray(y) dy = np.asanyarray(dy) args = {"x": x, "y": y, "dy": dy} for arg_name, arg in args.items(): if arg.ndim != 2: raise ValueError("{} should be 2-D array_like".format(arg_name)) # arguments should have all the same shape if not x.shape == y.shape == dy.shape: raise ValueError("arguments should be of same shape") # x should be sorted for all system sizes l if not np.array_equal(x, np.sort(x, axis=1)): raise ValueError("x should be sorted for each system size") # dy should have only positive entries if not np.all(dy > 0.0): raise ValueError("dy should have only positive values") # first dimension: system sizes l # second dimension: parameter values rho k, n = x.shape # pre-calculate weights and other matrices w = dy ** (-2) wx = w * x wy = w * y wxx = w * x * x wxy = w * x * y # calculate master curve estimates master_y = np.zeros_like(y) master_y[:] = np.nan master_dy2 = np.zeros_like(dy) master_dy2[:] = np.nan # loop through system sizes for i in range(k): j_primes = _jprimes(x=x, i=i, x_bounds=x_bounds) # loop through x values for j in range(n): # discard x value if it is out of bounds try: if not x_bounds[0] <= x[i][j] <= x_bounds[1]: continue except: pass # boolean mask for selected data x_l, y_l, dy_l select = _select_mask(j=j, j_primes=j_primes) if not select.any(): # no data to select # master curve estimate Y_ij remains undefined continue # master curve estimate master_y[i, j], master_dy2[i, j] = _wls_linearfit_predict( x=x[i, j], w=w, wx=wx, wy=wy, wxx=wxx, wxy=wxy, select=select ) # average within finite system sizes first return np.nanmean( np.nanmean( (y - master_y) ** 2 / (dy ** 2 + master_dy2), axis=1 ) )
def _neldermead_errors(sim, fsim, fun): """ Estimate the errors from the final simplex of the Nelder--Mead algorithm This is a helper function and not supposed to be called directly. Parameters ---------- sim : ndarray the final simplex fsim : ndarray the function values at the vertices of the final simplex fun : callable the goal function to minimize """ # fit quadratic coefficients n = len(sim) - 1 ymin = fsim[0] sim = np.copy(sim) fsim = np.copy(fsim) centroid = np.mean(sim, axis=0) fcentroid = fun(centroid) # enlarge distance of simplex vertices from centroid until all have at # least an absolute function value distance of 0.1 for i in range(n + 1): while np.abs(fsim[i] - fcentroid) < 0.01: sim[i] += sim[i] - centroid fsim[i] = fun(sim[i]) # the vertices and the midpoints x_ij x = 0.5 * ( sim[np.mgrid[0:n + 1, 0:n + 1]][1] + sim[np.mgrid[0:n + 1, 0:n + 1]][0] ) y = np.nan * np.ones(shape=(n + 1, n + 1)) for i in range(n + 1): y[i, i] = fsim[i] for j in range(i + 1, n + 1): y[i, j] = y[j, i] = fun(x[i, j]) y0i = y[np.mgrid[0:n + 1, 0:n + 1]][0][1:, 1:, 0] y0j = y[np.mgrid[0:n + 1, 0:n + 1]][0][0, 1:, 1:] b = 2 * (y[1:, 1:] + y[0, 0] - y0i - y0j) q = (sim - sim[0])[1:].T varco = ymin * np.dot(q, np.dot(np.linalg.inv(b), q.T)) return np.sqrt(np.diag(varco)), varco
[docs]def autoscale(l, rho, a, da, rho_c0, nu0, zeta0, x_bounds=None, **kwargs): """ Automatically scale finite-size data and fit critical point and exponents Parameters ---------- l, rho, a, da : array_like input for the :py:func:`scaledata` function rho_c0, nu0, zeta0 : float initial guesses for the critical point and exponents x_bounds : tuple of floats, optional lower and upper bound for scaled data `x` to consider Returns ------- res : OptimizeResult res['success'] : bool Indicates whether the optimization algorithm has terminated successfully. res['x'] : ndarray res['rho'], res['nu'], res['zeta'] : float The fitted critical point and exponents, ``res['x'] == [res['rho'], res['nu'], res['zeta']]`` res['drho'], res['dnu'], res['dzeta'] : float The respective standard errors derived from fitting the curvature at the minimum, ``res['errors'] == [res['drho'], res['dnu'], res['dzeta']]``. res['errors'], res['varco'] : ndarray The standard errors as a vector, and the full variance--covariance matrix (the diagonal entries of which are the squared standard errors), ``np.sqrt(np.diag(res['varco'])) == res['errors']`` See also -------- scaledata For the `l`, `rho`, `a`, `da` input parameters quality The goal function of the optimization scipy.optimize.minimize The optimization wrapper routine scipy.optimize.OptimizeResult The return type Notes ----- This implementation uses the quality function by Houdayer & Hartmann [8]_ which measures the quality of the data collapse, see the sections :ref:`data-collapse-method` and :ref:`quality-function` in the manual. This function and the whole fssa package have been inspired by Oliver Melchert and his superb **autoScale** package [9]_. The critical point and exponents, including its standard errors and (co)variances, are fitted by the Nelder--Mead algorithm, see the section :ref:`neldermead` in the manual. References ---------- .. [8] J. Houdayer and A. Hartmann, Physical Review B 70, 014418+ (2004) `doi:10.1103/physrevb.70.014418 <http://dx.doi.org/doi:10.1103/physrevb.70.014418>`_ .. [9] O. Melchert, `arXiv:0910.5403 <http://arxiv.org/abs/0910.5403>`_ (2009) Examples -------- >>> # generate artificial scaling data from master curve >>> # with rho_c == 1.0, nu == 2.0, zeta == 0.0 >>> import fssa >>> l = [ 10, 100, 1000 ] >>> rho = np.linspace(0.9, 1.1) >>> l_mesh, rho_mesh = np.meshgrid(l, rho, indexing='ij') >>> master_curve = lambda x: 1. / (1. + np.exp( - x)) >>> x = np.power(l_mesh, 0.5) * (rho_mesh - 1.) >>> y = master_curve(x) >>> dy = y / 100. >>> y += np.random.randn(*y.shape) * dy >>> a = y >>> da = dy >>> >>> # run autoscale >>> res = fssa.autoscale(l=l, rho=rho, a=a, da=da, rho_c0=0.9, nu0=2.0, zeta0=0.0) """ def goal_function(x): my_x, my_y, my_dy = scaledata( rho=rho, l=l, a=a, da=da, nu=x[1], zeta=x[2], rho_c=x[0], ) return quality( my_x, my_y, my_dy, x_bounds=x_bounds, ) ret = scipy.optimize.minimize( goal_function, [rho_c0, nu0, zeta0], method=_minimize_neldermead, options={ 'xtol': 1e-2, 'ftol': 1e-2, } ) errors, varco = _neldermead_errors( sim=ret['final_simplex'][0], fsim=ret['final_simplex'][1], fun=goal_function, ) ret['varco'] = varco ret['errors'] = errors ret['rho'], ret['nu'], ret['zeta'] = ret['x'] ret['drho'], ret['dnu'], ret['dzeta'] = ret['errors'] return ret