xerus
a general purpose tensor library
steepestDescent.cpp
Go to the documentation of this file.
1 // Xerus - A General Purpose Tensor Library
2 // Copyright (C) 2014-2017 Benjamin Huber and Sebastian Wolf.
3 //
4 // Xerus is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU Affero General Public License as published
6 // by the Free Software Foundation, either version 3 of the License,
7 // or (at your option) any later version.
8 //
9 // Xerus is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU Affero General Public License for more details.
13 //
14 // You should have received a copy of the GNU Affero General Public License
15 // along with Xerus. If not, see <http://www.gnu.org/licenses/>.
16 //
17 // For further information on Xerus visit https://libXerus.org
18 // or contact us at contact@libXerus.org.
19 
26 #include <xerus/algorithms/als.h>
27 #include <xerus/basic.h>
28 #include <xerus/index.h>
30 #include <xerus/tensorNetwork.h>
31 
34 
35 namespace xerus {
36 
37  void line_search(TTTensor &_x, value_t &_alpha, const TTTangentVector &_direction, value_t _derivative, value_t &_residual,
38  std::function<void(TTTensor &, const TTTangentVector &)> _retraction,
39  std::function<value_t()> _calculate_residual,
40  value_t _changeInAlpha)
41  {
42  value_t dirNorm = _direction.frob_norm();
43  value_t currAlpha = _alpha / _changeInAlpha;
44  TTTensor oldX(_x);
45  _retraction(_x, currAlpha/dirNorm * _direction);
46  value_t bestResidual = _calculate_residual();
47  value_t bestAlpha = currAlpha;
48  TTTensor bestX = _x;
49 
50  do {
51  currAlpha *= _changeInAlpha;
52  _x = oldX;
53  _retraction(_x, currAlpha/dirNorm * _direction);
54  value_t newResidual = _calculate_residual();
55 // LOG(line_search, currAlpha << " -> " << newResidual);
56  if (newResidual < bestResidual) {
57  bestResidual = newResidual;
58  bestAlpha = currAlpha;
59  bestX = _x;
60  } else {
61  break;
62  }
63  } while(true);
64 
65  _x = std::move(bestX);
66  _alpha = bestAlpha;
67 
68  // armijo backtracking
69  const value_t min_decrease = 1e-4;
70 // LOG(armijo, bestResidual << " > " << _residual << " - " << min_decrease << " * " << _alpha << " * " << _derivative << " / " << dirNorm);
71  while (_alpha > 1e-16 && bestResidual > _residual - min_decrease * _alpha/dirNorm * _derivative) {
72 // LOG(armijo, _alpha << " " << bestResidual);
73  _alpha *= _changeInAlpha;
74  _x = oldX;
75  _retraction(_x, _alpha/dirNorm * _direction);
76  bestResidual = _calculate_residual();
77  }
78 
79  _residual = bestResidual;
80  }
81 
82 
83  value_t SteepestDescentVariant::solve(const TTOperator *_Ap, TTTensor &_x, const TTTensor &_b, size_t _numSteps, value_t _convergenceEpsilon, PerformanceData &_perfData) const {
84  const TTOperator &_A = *_Ap;
85  static const Index i,j;
86  size_t stepCount=0;
87  TTTensor residual;
88  value_t lastResidual=1e100;
89  value_t currResidual=1e100;
90 // value_t normB = frob_norm(_b);
91 
92 
93  if (_Ap != nullptr) {
94  _perfData << "Steepest Descent for ||A*x - b||^2, x.dimensions: " << _x.dimensions << '\n'
95  << "A.ranks: " << _A.ranks() << '\n';
97  _perfData << " with symmetric positive definite Operator A\n";
98  }
99  } else {
100  _perfData << "Steepest Descent for ||x - b||^2, x.dimensions: " << _x.dimensions << '\n';
101  }
102  _perfData << "x.ranks: " << _x.ranks() << '\n'
103  << "b.ranks: " << _b.ranks() << '\n'
104  << "maximum number of steps: " << _numSteps << '\n'
105  << "convergence epsilon: " << _convergenceEpsilon << '\n';
106  _perfData.start();
107 
108  auto updateResidual = [&]() {
109  if (_Ap != nullptr) {
110  residual(i&0) = _b(i&0) - _A(i/2,j/2)*_x(j&0);
111  } else {
112  residual = _b - _x;
113  }
114  currResidual = frob_norm(residual);
115  };
116 
117  auto updatePerfdata = [&]() {
118  _perfData.add(currResidual);
119  };
120  updateResidual();
121  updatePerfdata();
122 
123  TTTensor y, Ay;
124  value_t alpha = 1;
125  while ((_numSteps == 0 || stepCount < _numSteps)
126  && currResidual > _convergenceEpsilon
127  && std::abs(lastResidual-currResidual) > _convergenceEpsilon
128  && std::abs(1-currResidual/lastResidual) > _convergenceEpsilon)
129  {
130  stepCount += 1;
131 
132  if (_Ap != nullptr) {
134  // search direction: y = b-Ax
135  y = residual;
136  if (preconditioner != nullptr) {
137  y(j&0) = (*preconditioner)(j/2,i/2) * y(i&0);
138  }
139  // direction of change A*y
140 // Ay(i&0) = _A(i/2,j/2) * y(j&0);
141  // "optimal" stepsize alpha = <y,y>/<y,Ay>
142 // alpha = misc::sqr(frob_norm(y)) / value_t(y(i&0)*Ay(i&0));
143  } else { XERUS_REQUIRE_TEST;
144  // search direction: y = A^T(b-Ax)
145  y(i&0) = _A(j/2,i/2) * residual(j&0);
146  if (preconditioner != nullptr) {
147  y(j&0) = (*preconditioner)(j/2,i/2) * y(i&0);
148  }
149  // direction of change A*y
150 // Ay(i&0) = _A(i/2,j/2) * y(j&0);
151  // "optimal" stepsize alpha = <y,y>/<Ay,Ay>
152 // alpha = misc::sqr(frob_norm(y)) / misc::sqr(frob_norm(Ay));
153  }
154  } else {
155  y = residual;
156  }
157 
158  TTTensor oldX(_x);
159  alpha *= 2;
160  retraction(_x, y * alpha);
161  lastResidual = currResidual;
162  updateResidual();
163  // armijo backtracking
164  while (alpha > 1e-30 && lastResidual < currResidual){// - 1e-4 * alpha * value_t(residual(i&0) * y(i&0))) {
165 // LOG(huch, alpha << " " << currResidual);
166  alpha /= 2;
167  _x = oldX;
168  retraction(_x, y * alpha);
169  updateResidual();
170  }
171 
172  updatePerfdata();
173  }
174 
175  return currResidual;
176  }
177 
179 } // namespace xerus
Header file for the Index class.
#define XERUS_REQUIRE_TEST
Definition: check.h:51
Header file for the IndexedTensorMoveable class.
Header file for the steepest descent algorithms.
Header file defining lists of indexed tensors.
Header file for the classes defining factorisations of Tensors.
class to compactly represent tangent vectors of the manifold of constant TT-rank
Definition: retractions.h:33
The main namespace of xerus.
Definition: basic.h:37
Header file for the ALS algorithm and its variants.
Wrapper class for all steepest descent variants.
double solve(const TTOperator *_Ap, TTTensor &_x, const TTTensor &_b, size_t _numSteps, value_t _convergenceEpsilon, PerformanceData &_perfData=NoPerfData) const
Storage class for the performance data collected during an algorithm (typically iteration count...
value_t frob_norm() const
std::vector< size_t > ranks() const
Gets the ranks of the TTNetwork.
Definition: ttNetwork.cpp:717
static XERUS_force_inline value_t frob_norm(const Tensor &_tensor)
Calculates the frobenius norm of the given tensor.
Definition: tensor.h:931
TTRetractionII retraction
the retraction to project from point + tangent vector to a new point on the manifold ...
Header file for shorthand notations that are xerus specific but used throughout the library...
double value_t
The type of values to be used by xerus.
Definition: basic.h:43
Class used to represent indices that can be used to write tensor calculations in index notation...
Definition: index.h:43
const SteepestDescentVariant SteepestDescent
default variant of the steepest descent algorithm using the lapack solver
void line_search(TTTensor &_x, value_t &_alpha, const TTTangentVector &_direction, value_t _derivative, value_t &_residual, std::function< void(TTTensor &, const TTTangentVector &)> _retraction, std::function< value_t()> _calculate_residual, value_t _changeInAlpha=0.5)
bool assumeSymmetricPositiveDefiniteOperator
calculates the gradient as b-Ax instead of A^T(b-Ax)
void SubmanifoldRetractionII(TTTensor &_U, const TTTensor &_change)
retraction that performs componentwise addition of and where is the i-th component of the riemanni...
Header file for the TensorNetwork class.
void add(const size_t _itrCount, const value_t _residual, const TensorNetwork::RankTuple _ranks=TensorNetwork::RankTuple(), const size_t _flags=0)
std::vector< size_t > dimensions
Dimensions of the external indices, i.e. the dimensions of the tensor represented by the network...