LCOV - code coverage report
Current view: top level - routines - manifold_sculpting.hpp (source / functions) Hit Total Coverage
Test: clean.info Lines: 0 131 0.0 %
Date: 2013-05-24 Functions: 0 14 0.0 %
Branches: 0 470 0.0 %

           Branch data     Line data    Source code
       1                 :            : /* This software is distributed under BSD 3-clause license (see LICENSE file).
       2                 :            :  *
       3                 :            :  * Copyright (c) 2012-2013 Vladyslav Gorbatiuk
       4                 :            :  */
       5                 :            : 
       6                 :            : #ifndef TAPKEE_MANIFOLD_SCULPTING_H_
       7                 :            : #define TAPKEE_MANIFOLD_SCULPTING_H_
       8                 :            : 
       9                 :            : /* Tapkee includes */
      10                 :            : #include <tapkee/defines.hpp>
      11                 :            : #include <tapkee/utils/sparse.hpp>
      12                 :            : /* End of Tapkee includes */
      13                 :            : 
      14                 :            : #include <math.h>
      15                 :            : #include <ctime>
      16                 :            : #include <cfloat>
      17                 :            : #include <deque>
      18                 :            : #include <set>
      19                 :            : 
      20                 :            : namespace tapkee
      21                 :            : {
      22                 :            : namespace tapkee_internal
      23                 :            : {
      24                 :            : 
      25                 :            : namespace 
      26                 :            : {
      27                 :            :         const ScalarType max_number_of_iterations_without_improvement = 20;
      28                 :            :         const ScalarType multiplier_treshold = 0.01;
      29                 :            :         const ScalarType weight_for_adjusted_point = 10.0;
      30                 :            :         const ScalarType learning_rate_grow_factor = 1.1;
      31                 :            :         const ScalarType learning_rate_shrink_factor = 0.9;
      32                 :            : }
      33                 :            : 
      34                 :            : /** @brief Data needed to compute error function
      35                 :            :  */ 
      36                 :            : struct DataForErrorFunc
      37                 :            : {
      38                 :            :         /** contains distances between point and its neighbors */
      39                 :            :         const SparseMatrix& distance_matrix;
      40                 :            :         /** sparse matrix that contains
      41                 :            :          * original angles between the point, its neighbor and
      42                 :            :          * the most collinear neighbor of the neighbor. If
      43                 :            :          * point's index is P, its neighbor's index is N1 and
      44                 :            :          * the index of neighbor's neighbor is N2, then the
      45                 :            :          * angle between them should be stored at index (P, N2)
      46                 :            :          */
      47                 :            :         const SparseMatrix& angles_matrix;
      48                 :            :         /** a vector of vectors, where I'th
      49                 :            :          * vector contains indices of neighbors for I'th point
      50                 :            :          */
      51                 :            :         const Neighbors& distance_neighbors;
      52                 :            :         /** a vector of vectors,
      53                 :            :          * where the vector at index I contains indices of
      54                 :            :          * neighbor's neighbor of the I'th point (so that
      55                 :            :          * we know, where to search for the angle value)
      56                 :            :          */
      57                 :            :         const Neighbors& angle_neighbors;
      58                 :            :         /** a set of indices of points, that have been 
      59                 :            :          * already adjusted
      60                 :            :          */
      61                 :            :         const std::set<IndexType>& adjusted_points;
      62                 :            :         /** initial average distance between neighbors */
      63                 :            :         const ScalarType average_distance;
      64                 :            : };
      65                 :            : 
      66                 :            : template<class RandomAccessIterator, class DistanceCallback>
      67                 :          0 : SparseMatrix neighbors_distances_matrix(RandomAccessIterator begin, RandomAccessIterator end, 
      68                 :            :                                         const Neighbors& neighbors, DistanceCallback callback,
      69                 :            :                                         ScalarType& average_distance)
      70                 :            : {
      71                 :          0 :         const IndexType k = neighbors[0].size();
      72                 :          0 :         const IndexType n = neighbors.size();
      73   [ #  #  #  #  :          0 :         if ((end-begin)!=n)
             #  #  #  # ]
      74 [ #  # ][ #  # ]:          0 :                 throw std::runtime_error("Wrong size");
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
      75                 :          0 :         SparseTriplets sparse_triplets;
      76   [ #  #  #  #  :          0 :         sparse_triplets.reserve(k*n);
             #  #  #  # ]
      77                 :          0 :         average_distance = 0;
      78                 :            :         ScalarType current_distance;
      79                 :            : 
      80 [ #  # ][ #  # ]:          0 :         for (IndexType i = 0; i < n; ++i)
         [ #  # ][ #  # ]
      81                 :            :         {
      82                 :          0 :                 const LocalNeighbors& current_neighbors = neighbors[i];
      83 [ #  # ][ #  # ]:          0 :                 for (IndexType j = 0; j < k; ++j)
         [ #  # ][ #  # ]
      84                 :            :                 {
      85 [ #  # ][ #  # ]:          0 :                         current_distance = callback.distance(begin[i], begin[current_neighbors[j]]);
                 [ #  # ]
      86                 :          0 :                         average_distance += current_distance;
      87                 :          0 :                         SparseTriplet triplet(i, current_neighbors[j], current_distance);
      88   [ #  #  #  #  :          0 :                         sparse_triplets.push_back(triplet);
             #  #  #  # ]
      89                 :            :                 }
      90                 :            :         }
      91                 :          0 :         average_distance /= (k*n);
      92 [ #  # ][ #  # ]:          0 :         return sparse_matrix_from_triplets(sparse_triplets, n, n);
         [ #  # ][ #  # ]
      93                 :            : }
      94                 :            : 
      95                 :          0 : SparseMatrixNeighborsPair angles_matrix_and_neighbors(const Neighbors& neighbors, 
      96                 :            :                                                       const DenseMatrix& data)
      97                 :            : {
      98                 :          0 :         const IndexType k = neighbors[0].size();
      99                 :          0 :         const IndexType n_vectors = data.cols();
     100                 :            : 
     101                 :          0 :         SparseTriplets sparse_triplets;
     102         [ #  # ]:          0 :         sparse_triplets.reserve(k * n_vectors);
     103                 :            :         /* I tried to find better naming, but... */
     104         [ #  # ]:          0 :         Neighbors most_collinear_neighbors_of_neighbors;
     105         [ #  # ]:          0 :         most_collinear_neighbors_of_neighbors.reserve(n_vectors);
     106                 :            : 
     107         [ #  # ]:          0 :         for (IndexType i = 0; i < n_vectors; ++i)
     108                 :            :         {
     109                 :          0 :                 const LocalNeighbors& current_neighbors = neighbors[i];
     110         [ #  # ]:          0 :                 LocalNeighbors most_collinear_current_neighbors;
     111         [ #  # ]:          0 :                 most_collinear_current_neighbors.reserve(k);
     112                 :            : 
     113         [ #  # ]:          0 :                 for (IndexType j = 0; j < k; ++j)
     114                 :            :                 {
     115                 :          0 :                         const LocalNeighbors& neighbors_of_neighbor = neighbors[current_neighbors[j]];
     116                 :            :                         /* The closer the cos value to -1.0 - the closer the angle to 180.0 */
     117                 :          0 :                         ScalarType min_cos_value = 1.0, current_cos_value;
     118                 :            :                         /* This value will be updated during the seach for most collinear neighbor */
     119         [ #  # ]:          0 :                         most_collinear_current_neighbors.push_back(0);
     120                 :            : 
     121         [ #  # ]:          0 :                         for (IndexType l = 0; l < k; ++l)
     122                 :            :                         {
     123 [ #  # ][ #  # ]:          0 :                                 DenseVector neighbor_to_point = data.col(i) - data.col(current_neighbors[j]);
         [ #  # ][ #  # ]
     124                 :          0 :                                 DenseVector neighbor_to_its_neighbor = data.col(neighbors_of_neighbor[l])
     125   [ #  #  #  # ]:          0 :                                                                                                                 - data.col(current_neighbors[j]);
         [ #  # ][ #  # ]
     126         [ #  # ]:          0 :                                 current_cos_value = neighbor_to_point.dot(neighbor_to_its_neighbor) /
     127         [ #  # ]:          0 :                                                                         (neighbor_to_point.norm() *
     128         [ #  # ]:          0 :                                                                          neighbor_to_its_neighbor.norm());
     129         [ #  # ]:          0 :                                 if (current_cos_value < min_cos_value)
     130                 :            :                                 {
     131                 :          0 :                                         most_collinear_current_neighbors[j] = neighbors_of_neighbor[l];
     132                 :          0 :                                         min_cos_value = current_cos_value;
     133                 :            :                                 }
     134 [ #  # ][ #  # ]:          0 :                         }
     135                 :            : 
     136                 :          0 :                         SparseTriplet triplet(i, most_collinear_current_neighbors[j], min_cos_value);
     137         [ #  # ]:          0 :                         sparse_triplets.push_back(triplet);
     138                 :            :                 }
     139         [ #  # ]:          0 :                 most_collinear_neighbors_of_neighbors.push_back(most_collinear_current_neighbors);
     140         [ #  # ]:          0 :         }
     141                 :            :         return SparseMatrixNeighborsPair
     142                 :            :                 (sparse_matrix_from_triplets(sparse_triplets, n_vectors, n_vectors), 
     143 [ #  # ][ #  # ]:          0 :                  most_collinear_neighbors_of_neighbors);
         [ #  # ][ #  # ]
     144                 :            : }
     145                 :            : 
     146                 :          0 : ScalarType average_neighbor_distance(const DenseMatrix& data, const Neighbors& neighbors)
     147                 :            : {
     148                 :          0 :         IndexType k = neighbors[0].size();
     149                 :          0 :         ScalarType average_distance = 0;
     150                 :            : 
     151         [ #  # ]:          0 :         for (IndexType i = 0; i < data.cols(); ++i)
     152                 :            :         {
     153         [ #  # ]:          0 :                 for (IndexType j = 0; j < k; ++j)
     154                 :            :                 {
     155                 :          0 :                         average_distance += (data.col(i) - data.col(neighbors[i][j])).norm();
     156                 :            :                 }
     157                 :            :         }
     158                 :          0 :         return average_distance / (k * data.cols());
     159                 :            : }
     160                 :            : 
     161                 :          0 : ScalarType compute_error_for_point(const IndexType index, const DenseMatrix& data,
     162                 :            :                                    const DataForErrorFunc& error_func_data)
     163                 :            : {
     164                 :          0 :         IndexType k = error_func_data.distance_neighbors[0].size();
     165                 :          0 :         ScalarType error_value = 0;
     166         [ #  # ]:          0 :         for (IndexType i = 0; i < k; ++i)
     167                 :            :         {
     168                 :            :                 /* Find new angle */
     169                 :          0 :                 IndexType neighbor = error_func_data.distance_neighbors[index][i];
     170                 :          0 :                 IndexType neighbor_of_neighbor = error_func_data.angle_neighbors[index][i];
     171                 :            :                 /* TODO: Extract into a small function, that will find the angle between 3 points */
     172 [ #  # ][ #  # ]:          0 :                 DenseVector neighbor_to_point = data.col(index) - data.col(neighbor);
     173                 :            :                 DenseVector neighbor_to_its_neighbor = data.col(neighbor_of_neighbor)
     174 [ #  # ][ #  # ]:          0 :                                                                                                 - data.col(neighbor);
         [ #  # ][ #  # ]
     175         [ #  # ]:          0 :                 ScalarType current_cos_value = neighbor_to_point.dot(neighbor_to_its_neighbor) /
     176         [ #  # ]:          0 :                                                                         (neighbor_to_point.norm() *
     177         [ #  # ]:          0 :                                                                          neighbor_to_its_neighbor.norm());
     178                 :            :                 /* Find new distance */
     179 [ #  # ][ #  # ]:          0 :                 ScalarType current_distance = (data.col(index) - data.col(neighbor)).norm();
         [ #  # ][ #  # ]
     180                 :            :                 /* Compute one component of error function's value*/
     181                 :            :                 ScalarType diff_cos = 
     182         [ #  # ]:          0 :                         current_cos_value - error_func_data.angles_matrix.coeff(index, neighbor_of_neighbor);
     183         [ #  # ]:          0 :                 if (diff_cos < 0)
     184                 :          0 :                         diff_cos = 0;
     185                 :            :                 ScalarType diff_distance = 
     186         [ #  # ]:          0 :                         current_distance - error_func_data.distance_matrix.coeff(index, neighbor);
     187                 :          0 :                 diff_distance /= error_func_data.average_distance;
     188                 :            :                 /* Weight for adjusted point should be bigger than 1, according to the
     189                 :            :                  * original algorithm
     190                 :            :                  */
     191                 :            :                 ScalarType weight = 
     192 [ #  # ][ #  # ]:          0 :                         (error_func_data.adjusted_points.count(neighbor) == 0) ? 1 : weight_for_adjusted_point;
     193                 :          0 :                 error_value += weight * (diff_cos * diff_cos + diff_distance * diff_distance);
     194         [ #  # ]:          0 :         }
     195                 :          0 :         return error_value;
     196                 :            : }
     197                 :            : 
     198                 :            : /** Adjust the data point at given index to restore
     199                 :            :  * the original relationships between point and its
     200                 :            :  * neighbors. Uses simple hill-climbing technique
     201                 :            :  * @param index index of the point to adjust
     202                 :            :  * @param data the data matrix - will be changed after
     203                 :            :  * the point adjustment (only index column will change)
     204                 :            :  * @param target_dimension - you know, what it is:)
     205                 :            :  * @param learning_rate some small value, that will be
     206                 :            :  * used during hill-climbing to change the points coordinates 
     207                 :            :  * @param error_func_data a special struct, that contains
     208                 :            :  * data, needed for error function calculation - such
     209                 :            :  * as initial distances between neighbors, initial
     210                 :            :  * angles, etc.
     211                 :            :  * @param point_error - will be set to the error function
     212                 :            :  * value, calculated for the point
     213                 :            :  * @return a number of steps it took to  adjust the
     214                 :            :  * point
     215                 :            :  */
     216                 :          0 : IndexType adjust_point_at_index(const IndexType index, DenseMatrix& data, 
     217                 :            :                                 const IndexType target_dimension, 
     218                 :            :                                 const ScalarType learning_rate,
     219                 :            :                                 const DataForErrorFunc& error_func_data,
     220                 :            :                                 ScalarType& point_error)
     221                 :            : {
     222                 :          0 :         IndexType n_steps = 0;
     223                 :            :         ScalarType old_error, new_error;
     224                 :          0 :         bool finish = false;
     225         [ #  # ]:          0 :         while (!finish)
     226                 :            :         {
     227                 :          0 :                 finish = true;
     228                 :          0 :                 old_error = compute_error_for_point(index, data, error_func_data);
     229                 :            : 
     230         [ #  # ]:          0 :                 for (IndexType i = 0; i < target_dimension; ++i)
     231                 :            :                 {
     232                 :            :                         /* Try to change the current coordinate in positive direction */
     233                 :          0 :                         data(i, index) += learning_rate;
     234                 :          0 :                         new_error = compute_error_for_point(index, data, error_func_data);
     235         [ #  # ]:          0 :                         if (new_error >= old_error)
     236                 :            :                         {
     237                 :            :                                 /* Did not help - switching to negative direction */
     238                 :          0 :                                 data(i, index) -= 2 * learning_rate;
     239                 :          0 :                                 new_error = compute_error_for_point(index, data, error_func_data);
     240                 :            : 
     241                 :            :                         }
     242         [ #  # ]:          0 :                         if (new_error >= old_error)
     243                 :            :                                 /* Did not help again - reverting to beginning */
     244                 :          0 :                                 data(i, index) += learning_rate;
     245                 :            :                         else
     246                 :            :                         {
     247                 :            :                                 /* We made some progress (improved an error) */
     248                 :          0 :                                 old_error = new_error;
     249                 :          0 :                                 finish = false;
     250                 :            :                         }
     251                 :            :                 }
     252                 :          0 :                 ++n_steps;
     253                 :            :         }
     254                 :          0 :         point_error = compute_error_for_point(index, data, error_func_data);
     255                 :          0 :         return n_steps;
     256                 :            : }
     257                 :            : 
     258                 :            : template <class RandomAccessIterator, class DistanceCallback>
     259                 :          0 : void manifold_sculpting_embed(RandomAccessIterator begin, RandomAccessIterator end,
     260                 :            :                               DenseMatrix& data, IndexType target_dimension,
     261                 :            :                               const Neighbors& neighbors, DistanceCallback callback, 
     262                 :            :                               IndexType max_iteration, ScalarType squishing_rate)
     263                 :            : {
     264                 :            :         /* Step 1: Get initial distances to each neighbor and initial
     265                 :            :          * angles between the point Pi, each neighbor Nij, and the most
     266                 :            :          * collinear neighbor of Nij.
     267                 :            :          */
     268                 :            :         ScalarType initial_average_distance;
     269                 :            :         SparseMatrix distances_to_neighbors = 
     270                 :          0 :                 neighbors_distances_matrix(begin, end, neighbors, callback, initial_average_distance);
     271                 :            :         SparseMatrixNeighborsPair angles_and_neighbors =
     272   [ #  #  #  #  :          0 :                 angles_matrix_and_neighbors(neighbors, data);
             #  #  #  # ]
     273                 :            : 
     274                 :            :         /* Step 2: Optionally preprocess the data using PCA
     275                 :            :          * (skipped for now).
     276                 :            :          */
     277                 :          0 :         ScalarType no_improvement_counter = 0, normal_counter = 0;
     278                 :          0 :         ScalarType current_multiplier = squishing_rate;
     279                 :          0 :         ScalarType learning_rate = initial_average_distance;
     280                 :          0 :         ScalarType best_error = DBL_MAX, current_error, point_error;
     281                 :            :         /* Step 3: Do until no improvement is made for some period
     282                 :            :          * (or until max_iteration number is reached):
     283                 :            :          */
     284 [ #  # ][ #  # ]:          0 :         while (((no_improvement_counter++ < max_number_of_iterations_without_improvement)
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
     285                 :            :                         || (current_multiplier >  multiplier_treshold))
     286                 :            :                         && (normal_counter++ < max_iteration))
     287                 :            :         {
     288                 :            :                 /* Step 3a: Scale the data in non-preserved dimensions
     289                 :            :                  * by a factor of squishing_rate.
     290                 :            :                  */
     291 [ #  # ][ #  # ]:          0 :                 data.bottomRows(data.rows() - target_dimension) *= squishing_rate;
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
     292 [ #  # ][ #  # ]:          0 :                 while (average_neighbor_distance(data, neighbors) < initial_average_distance)
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
     293                 :            :                 {
     294 [ #  # ][ #  # ]:          0 :                         data.topRows(target_dimension) /= squishing_rate;
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
     295                 :            :                 }
     296                 :          0 :                 current_multiplier *= squishing_rate;
     297                 :            : 
     298                 :            :                 /* Step 3b: Restore the previously computed relationships
     299                 :            :                  * (distances to neighbors and angles to ...) by adjusting
     300                 :            :                  * data points in first target_dimension dimensions.
     301                 :            :                  */
     302                 :            :                 /* Start adjusting from a random point */
     303 [ #  # ][ #  # ]:          0 :                 IndexType start_point_index = std::rand() % data.cols();
         [ #  # ][ #  # ]
     304 [ #  # ][ #  # ]:          0 :                 std::deque<IndexType> points_to_adjust;
         [ #  # ][ #  # ]
     305 [ #  # ][ #  # ]:          0 :                 points_to_adjust.push_back(start_point_index);
         [ #  # ][ #  # ]
     306                 :          0 :                 ScalarType steps_made = 0;
     307                 :          0 :                 current_error = 0;
     308 [ #  # ][ #  # ]:          0 :                 std::set<IndexType> adjusted_points;
         [ #  # ][ #  # ]
     309                 :            : 
     310 [ #  # ][ #  # ]:          0 :                 while (!points_to_adjust.empty())
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
     311                 :            :                 {
     312 [ #  # ][ #  # ]:          0 :                         IndexType current_point_index = points_to_adjust.front();
         [ #  # ][ #  # ]
     313 [ #  # ][ #  # ]:          0 :                         points_to_adjust.pop_front();
         [ #  # ][ #  # ]
     314 [ #  # ][ #  # ]:          0 :                         if (adjusted_points.count(current_point_index) == 0)
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
     315                 :            :                         {
     316                 :            :                         DataForErrorFunc error_func_data = {
     317                 :            :                                         distances_to_neighbors,
     318                 :            :                                         angles_and_neighbors.first,
     319                 :            :                                         neighbors,
     320                 :            :                                         angles_and_neighbors.second,
     321                 :            :                                         adjusted_points,
     322                 :            :                                         initial_average_distance
     323                 :          0 :                                 };
     324 [ #  # ][ #  # ]:          0 :                                 adjust_point_at_index(current_point_index, data, target_dimension,
         [ #  # ][ #  # ]
     325                 :            :                                                                         learning_rate, error_func_data, point_error);
     326                 :          0 :                                 current_error += point_error;
     327                 :            :                                 /* Insert all neighbors into deque */
     328 [ #  # ][ #  # ]:          0 :                                 std::copy(neighbors[current_point_index].begin(), 
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
     329                 :            :                                           neighbors[current_point_index].end(),
     330                 :            :                                           std::back_inserter(points_to_adjust));
     331 [ #  # ][ #  # ]:          0 :                                 adjusted_points.insert(current_point_index);
         [ #  # ][ #  # ]
     332                 :            :                         }
     333                 :            :                 }
     334                 :            : 
     335 [ #  # ][ #  # ]:          0 :                 if (steps_made > data.cols())
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
     336                 :          0 :                         learning_rate *= learning_rate_grow_factor;
     337                 :            :                 else
     338                 :          0 :                         learning_rate *= learning_rate_shrink_factor;
     339                 :            : 
     340 [ #  # ][ #  # ]:          0 :                 if (current_error < best_error)
         [ #  # ][ #  # ]
     341                 :            :                 {
     342                 :          0 :                         best_error = current_error;
     343 [ #  # ][ #  # ]:          0 :                         no_improvement_counter = 0;
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
     344                 :            :                 }
     345                 :            :         }
     346                 :            : 
     347 [ #  # ][ #  # ]:          0 :         data.conservativeResize(target_dimension, Eigen::NoChange);
         [ #  # ][ #  # ]
     348 [ #  # ][ #  # ]:          0 :         data.transposeInPlace();
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
         [ #  # ][ #  # ]
     349                 :          0 : }
     350                 :            : 
     351                 :            : }
     352                 :            : }
     353                 :            : 
     354                 :            : #endif

Generated by: LCOV version 1.9