Source code for descent.main

from . import proxops
from .utils import destruct, restruct, wrap

import sys
from time import perf_counter
from collections import namedtuple, defaultdict
from itertools import count
from functools import wraps

import numpy as np
from scipy.optimize import OptimizeResult
from toolz import compose
import tableprint as tp

__all__ = ['Consensus', 'gradient_optimizer']


class Optimizer:
    def restruct(self, x):
        if 'theta' not in self.__dict__:
            raise KeyError('theta not defined')
        return restruct(x, self.theta)

    def optional_print(self, message):
        if self.display:
            self.display.write(message + "\n")
            self.display.flush()


[docs]class Consensus(Optimizer): def __init__(self, tau=(10., 2., 2.), tol=(1e-6, 1e-3)): """ Proximal Consensus (ADMM) Parameters ---------- tau : (float, float, float) ADMM scheduling. The augmented Lagrangian quadratic penalty parameter, rho, is initialized to tau[0]. Depending on the primal and dual residuals, the parameter is increased by a factor of tau[1] or decreased by a factor of tau[2] at every iteration. (See Boyd et. al. 2011 for details) tol : (float, float) Primal and Dual residual tolerances """ self.operators = [] self.tau = namedtuple('tau', ('init', 'inc', 'dec'))(*tau) self.tol = namedtuple('tol', ('primal', 'dual'))(*tol)
[docs] def add(self, operator, *args): """Adds a proximal operator to the list of operators""" if isinstance(operator, str): op = getattr(proxops, operator)(*args) elif issubclass(operator, proxops.ProximalOperatorBaseClass): op = operator self.operators.append(op) return self
[docs] def minimize(self, x0, display=None, maxiter=np.Inf): self.theta = x0 primals = [destruct(x0) for _ in self.operators] duals = [np.zeros_like(p) for p in primals] rho = self.tau.init resid = defaultdict(list) try: for k in count(): # store the parameters from the previous iteration theta_prev = destruct(self.theta) # update each primal variable primals = [op(self.restruct(theta_prev - dual), rho).ravel() for op, dual in zip(self.operators, duals)] # average primal copies theta_avg = np.mean(primals, axis=0) # update the dual variables (after primal update has finished) duals = [dual + primal - theta_avg for dual, primal in zip(duals, primals)] # compute primal and dual residuals primal_resid = float(np.sum([np.linalg.norm(primal - theta_avg) for primal in primals])) dual_resid = len(self.operators) * rho ** 2 * \ np.linalg.norm(theta_avg - theta_prev) # update penalty parameter according to primal and dual residuals # (see sect. 3.4.1 of the Boyd and Parikh ADMM paper) if primal_resid > self.tau.init * dual_resid: rho *= float(self.tau.inc) elif dual_resid > self.tau.init * primal_resid: rho /= float(self.tau.dec) resid['primal'].append(primal_resid) resid['dual'].append(dual_resid) resid['rho'].append(rho) # check for convergence if (primal_resid <= self.tol.primal) & (dual_resid <= self.tol.dual): break if k >= maxiter: break except KeyboardInterrupt: pass return OptimizeResult({ 'x': self.restruct(theta_avg), 'k': k, })
[docs]def gradient_optimizer(coro): """Turns a coroutine into a gradient based optimizer.""" class GradientOptimizer(Optimizer): @wraps(coro) def __init__(self, *args, **kwargs): self.algorithm = coro(*args, **kwargs) self.algorithm.send(None) self.operators = [] def set_transform(self, func): self.transform = compose(destruct, func, self.restruct) def minimize(self, f_df, x0, display=sys.stdout, maxiter=1e3): self.display = display self.theta = x0 # setup xk = self.algorithm.send(destruct(x0).copy()) store = defaultdict(list) runtimes = [] if len(self.operators) == 0: self.operators = [proxops.identity()] # setup obj, grad = wrap(f_df, x0) transform = compose(destruct, *reversed(self.operators), self.restruct) self.optional_print(tp.header(['Iteration', 'Objective', '||Grad||', 'Runtime'])) try: for k in count(): # setup tstart = perf_counter() f = obj(xk) df = grad(xk) xk = transform(self.algorithm.send(df)) runtimes.append(perf_counter() - tstart) store['f'].append(f) # Update display self.optional_print(tp.row([k, f, np.linalg.norm(destruct(df)), tp.humantime(runtimes[-1])])) if k >= maxiter: break except KeyboardInterrupt: pass self.optional_print(tp.bottom(4)) # cleanup self.optional_print(u'\u279b Final objective: {}'.format(store['f'][-1])) self.optional_print(u'\u279b Total runtime: {}'.format(tp.humantime(sum(runtimes)))) self.optional_print(u'\u279b Per iteration runtime: {} +/- {}'.format( tp.humantime(np.mean(runtimes)), tp.humantime(np.std(runtimes)), )) # result return OptimizeResult({ 'x': self.restruct(xk), 'f': f, 'df': self.restruct(df), 'k': k, 'obj': np.array(store['f']), }) return GradientOptimizer