Tapkee
manifold_sculpting.hpp
Go to the documentation of this file.
00001 /* This software is distributed under BSD 3-clause license (see LICENSE file).
00002  *
00003  * Copyright (c) 2012-2013 Vladyslav Gorbatiuk
00004  */
00005 
00006 #ifndef TAPKEE_MANIFOLD_SCULPTING_H_
00007 #define TAPKEE_MANIFOLD_SCULPTING_H_
00008 
00009 /* Tapkee includes */
00010 #include <tapkee/defines.hpp>
00011 #include <tapkee/utils/sparse.hpp>
00012 /* End of Tapkee includes */
00013 
00014 #include <math.h>
00015 #include <ctime>
00016 #include <cfloat>
00017 #include <deque>
00018 #include <set>
00019 
00020 namespace tapkee
00021 {
00022 namespace tapkee_internal
00023 {
00024 
00025 namespace 
00026 {
00027     const ScalarType max_number_of_iterations_without_improvement = 20;
00028     const ScalarType multiplier_treshold = 0.01;
00029     const ScalarType weight_for_adjusted_point = 10.0;
00030     const ScalarType learning_rate_grow_factor = 1.1;
00031     const ScalarType learning_rate_shrink_factor = 0.9;
00032 }
00033 
00036 struct DataForErrorFunc
00037 {
00039     const SparseMatrix& distance_matrix;
00047     const SparseMatrix& angles_matrix;
00051     const Neighbors& distance_neighbors;
00057     const Neighbors& angle_neighbors;
00061     const std::set<IndexType>& adjusted_points;
00063     const ScalarType average_distance;
00064 };
00065 
00066 template<class RandomAccessIterator, class DistanceCallback>
00067 SparseMatrix neighbors_distances_matrix(RandomAccessIterator begin, RandomAccessIterator end, 
00068                                         const Neighbors& neighbors, DistanceCallback callback,
00069                                         ScalarType& average_distance)
00070 {
00071     const IndexType k = neighbors[0].size();
00072     const IndexType n = neighbors.size();
00073     if ((end-begin)!=n)
00074         throw std::runtime_error("Wrong size");
00075     SparseTriplets sparse_triplets;
00076     sparse_triplets.reserve(k*n);
00077     average_distance = 0;
00078     ScalarType current_distance;
00079 
00080     for (IndexType i = 0; i < n; ++i)
00081     {
00082         const LocalNeighbors& current_neighbors = neighbors[i];
00083         for (IndexType j = 0; j < k; ++j)
00084         {
00085             current_distance = callback.distance(begin[i], begin[current_neighbors[j]]);
00086             average_distance += current_distance;
00087             SparseTriplet triplet(i, current_neighbors[j], current_distance);
00088             sparse_triplets.push_back(triplet);
00089         }
00090     }
00091     average_distance /= (k*n);
00092     return sparse_matrix_from_triplets(sparse_triplets, n, n);
00093 }
00094 
00095 SparseMatrixNeighborsPair angles_matrix_and_neighbors(const Neighbors& neighbors, 
00096                                                       const DenseMatrix& data)
00097 {
00098     const IndexType k = neighbors[0].size();
00099     const IndexType n_vectors = data.cols();
00100 
00101     SparseTriplets sparse_triplets;
00102     sparse_triplets.reserve(k * n_vectors);
00103     /* I tried to find better naming, but... */
00104     Neighbors most_collinear_neighbors_of_neighbors;
00105     most_collinear_neighbors_of_neighbors.reserve(n_vectors);
00106 
00107     for (IndexType i = 0; i < n_vectors; ++i)
00108     {
00109         const LocalNeighbors& current_neighbors = neighbors[i];
00110         LocalNeighbors most_collinear_current_neighbors;
00111         most_collinear_current_neighbors.reserve(k);
00112 
00113         for (IndexType j = 0; j < k; ++j)
00114         {
00115             const LocalNeighbors& neighbors_of_neighbor = neighbors[current_neighbors[j]];
00116             /* The closer the cos value to -1.0 - the closer the angle to 180.0 */
00117             ScalarType min_cos_value = 1.0, current_cos_value;
00118             /* This value will be updated during the seach for most collinear neighbor */
00119             most_collinear_current_neighbors.push_back(0);
00120 
00121             for (IndexType l = 0; l < k; ++l)
00122             {
00123                 DenseVector neighbor_to_point = data.col(i) - data.col(current_neighbors[j]);
00124                 DenseVector neighbor_to_its_neighbor = data.col(neighbors_of_neighbor[l])
00125                                                         - data.col(current_neighbors[j]);
00126                 current_cos_value = neighbor_to_point.dot(neighbor_to_its_neighbor) /
00127                                     (neighbor_to_point.norm() *
00128                                      neighbor_to_its_neighbor.norm());
00129                 if (current_cos_value < min_cos_value)
00130                 {
00131                     most_collinear_current_neighbors[j] = neighbors_of_neighbor[l];
00132                     min_cos_value = current_cos_value;
00133                 }
00134             }
00135 
00136             SparseTriplet triplet(i, most_collinear_current_neighbors[j], min_cos_value);
00137             sparse_triplets.push_back(triplet);
00138         }
00139         most_collinear_neighbors_of_neighbors.push_back(most_collinear_current_neighbors);
00140     }
00141     return SparseMatrixNeighborsPair
00142         (sparse_matrix_from_triplets(sparse_triplets, n_vectors, n_vectors), 
00143          most_collinear_neighbors_of_neighbors);
00144 }
00145 
00146 ScalarType average_neighbor_distance(const DenseMatrix& data, const Neighbors& neighbors)
00147 {
00148     IndexType k = neighbors[0].size();
00149     ScalarType average_distance = 0;
00150 
00151     for (IndexType i = 0; i < data.cols(); ++i)
00152     {
00153         for (IndexType j = 0; j < k; ++j)
00154         {
00155             average_distance += (data.col(i) - data.col(neighbors[i][j])).norm();
00156         }
00157     }
00158     return average_distance / (k * data.cols());
00159 }
00160 
00161 ScalarType compute_error_for_point(const IndexType index, const DenseMatrix& data,
00162                                    const DataForErrorFunc& error_func_data)
00163 {
00164     IndexType k = error_func_data.distance_neighbors[0].size();
00165     ScalarType error_value = 0;
00166     for (IndexType i = 0; i < k; ++i)
00167     {
00168         /* Find new angle */
00169         IndexType neighbor = error_func_data.distance_neighbors[index][i];
00170         IndexType neighbor_of_neighbor = error_func_data.angle_neighbors[index][i];
00171         /* TODO: Extract into a small function, that will find the angle between 3 points */
00172         DenseVector neighbor_to_point = data.col(index) - data.col(neighbor);
00173         DenseVector neighbor_to_its_neighbor = data.col(neighbor_of_neighbor)
00174                                                 - data.col(neighbor);
00175         ScalarType current_cos_value = neighbor_to_point.dot(neighbor_to_its_neighbor) /
00176                                     (neighbor_to_point.norm() *
00177                                      neighbor_to_its_neighbor.norm());
00178         /* Find new distance */
00179         ScalarType current_distance = (data.col(index) - data.col(neighbor)).norm();
00180         /* Compute one component of error function's value*/
00181         ScalarType diff_cos = 
00182             current_cos_value - error_func_data.angles_matrix.coeff(index, neighbor_of_neighbor);
00183         if (diff_cos < 0)
00184             diff_cos = 0;
00185         ScalarType diff_distance = 
00186             current_distance - error_func_data.distance_matrix.coeff(index, neighbor);
00187         diff_distance /= error_func_data.average_distance;
00188         /* Weight for adjusted point should be bigger than 1, according to the
00189          * original algorithm
00190          */
00191         ScalarType weight = 
00192             (error_func_data.adjusted_points.count(neighbor) == 0) ? 1 : weight_for_adjusted_point;
00193         error_value += weight * (diff_cos * diff_cos + diff_distance * diff_distance);
00194     }
00195     return error_value;
00196 }
00197 
00216 IndexType adjust_point_at_index(const IndexType index, DenseMatrix& data, 
00217                                 const IndexType target_dimension, 
00218                                 const ScalarType learning_rate,
00219                                 const DataForErrorFunc& error_func_data,
00220                                 ScalarType& point_error)
00221 {
00222     IndexType n_steps = 0;
00223     ScalarType old_error, new_error;
00224     bool finish = false;
00225     while (!finish)
00226     {
00227         finish = true;
00228         old_error = compute_error_for_point(index, data, error_func_data);
00229 
00230         for (IndexType i = 0; i < target_dimension; ++i)
00231         {
00232             /* Try to change the current coordinate in positive direction */
00233             data(i, index) += learning_rate;
00234             new_error = compute_error_for_point(index, data, error_func_data);
00235             if (new_error >= old_error)
00236             {
00237                 /* Did not help - switching to negative direction */
00238                 data(i, index) -= 2 * learning_rate;
00239                 new_error = compute_error_for_point(index, data, error_func_data);
00240 
00241             }
00242             if (new_error >= old_error)
00243                 /* Did not help again - reverting to beginning */
00244                 data(i, index) += learning_rate;
00245             else
00246             {
00247                 /* We made some progress (improved an error) */
00248                 old_error = new_error;
00249                 finish = false;
00250             }
00251         }
00252         ++n_steps;
00253     }
00254     point_error = compute_error_for_point(index, data, error_func_data);
00255     return n_steps;
00256 }
00257 
00258 template <class RandomAccessIterator, class DistanceCallback>
00259 void manifold_sculpting_embed(RandomAccessIterator begin, RandomAccessIterator end,
00260                               DenseMatrix& data, IndexType target_dimension,
00261                               const Neighbors& neighbors, DistanceCallback callback, 
00262                               IndexType max_iteration, ScalarType squishing_rate)
00263 {
00264     /* Step 1: Get initial distances to each neighbor and initial
00265      * angles between the point Pi, each neighbor Nij, and the most
00266      * collinear neighbor of Nij.
00267      */
00268     ScalarType initial_average_distance;
00269     SparseMatrix distances_to_neighbors = 
00270         neighbors_distances_matrix(begin, end, neighbors, callback, initial_average_distance);
00271     SparseMatrixNeighborsPair angles_and_neighbors =
00272         angles_matrix_and_neighbors(neighbors, data);
00273 
00274     /* Step 2: Optionally preprocess the data using PCA
00275      * (skipped for now).
00276      */
00277     ScalarType no_improvement_counter = 0, normal_counter = 0;
00278     ScalarType current_multiplier = squishing_rate;
00279     ScalarType learning_rate = initial_average_distance;
00280     ScalarType best_error = DBL_MAX, current_error, point_error;
00281     /* Step 3: Do until no improvement is made for some period
00282      * (or until max_iteration number is reached):
00283      */
00284     while (((no_improvement_counter++ < max_number_of_iterations_without_improvement)
00285             || (current_multiplier >  multiplier_treshold))
00286             && (normal_counter++ < max_iteration))
00287     {
00288         /* Step 3a: Scale the data in non-preserved dimensions
00289          * by a factor of squishing_rate.
00290          */
00291         data.bottomRows(data.rows() - target_dimension) *= squishing_rate;
00292         while (average_neighbor_distance(data, neighbors) < initial_average_distance)
00293         {
00294             data.topRows(target_dimension) /= squishing_rate;
00295         }
00296         current_multiplier *= squishing_rate;
00297 
00298         /* Step 3b: Restore the previously computed relationships
00299          * (distances to neighbors and angles to ...) by adjusting
00300          * data points in first target_dimension dimensions.
00301          */
00302         /* Start adjusting from a random point */
00303         IndexType start_point_index = std::rand() % data.cols();
00304         std::deque<IndexType> points_to_adjust;
00305         points_to_adjust.push_back(start_point_index);
00306         ScalarType steps_made = 0;
00307         current_error = 0;
00308         std::set<IndexType> adjusted_points;
00309 
00310         while (!points_to_adjust.empty())
00311         {
00312             IndexType current_point_index = points_to_adjust.front();
00313             points_to_adjust.pop_front();
00314             if (adjusted_points.count(current_point_index) == 0)
00315             {
00316             DataForErrorFunc error_func_data = {
00317                     distances_to_neighbors,
00318                     angles_and_neighbors.first,
00319                     neighbors,
00320                     angles_and_neighbors.second,
00321                     adjusted_points,
00322                     initial_average_distance
00323                 };
00324                 adjust_point_at_index(current_point_index, data, target_dimension,
00325                                     learning_rate, error_func_data, point_error);
00326                 current_error += point_error;
00327                 /* Insert all neighbors into deque */
00328                 std::copy(neighbors[current_point_index].begin(), 
00329                           neighbors[current_point_index].end(),
00330                           std::back_inserter(points_to_adjust));
00331                 adjusted_points.insert(current_point_index);
00332             }
00333         }
00334 
00335         if (steps_made > data.cols())
00336             learning_rate *= learning_rate_grow_factor;
00337         else
00338             learning_rate *= learning_rate_shrink_factor;
00339 
00340         if (current_error < best_error)
00341         {
00342             best_error = current_error;
00343             no_improvement_counter = 0;
00344         }
00345     }
00346 
00347     data.conservativeResize(target_dimension, Eigen::NoChange);
00348     data.transposeInPlace();
00349 }
00350 
00351 }
00352 }
00353 
00354 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines