Tapkee
spe.hpp
Go to the documentation of this file.
00001 /* This software is distributed under BSD 3-clause license (see LICENSE file).
00002  *
00003  * Copyright (c) 2012-2013 Sergey Lisitsyn, Fernando J. Iglesias Garcia
00004  */
00005 
00006 #ifndef TAPKEE_SPE_H_
00007 #define TAPKEE_SPE_H_
00008 
00009 /* Tapkee includes */
00010 #include <tapkee/defines.hpp>
00011 #include <tapkee/utils/time.hpp>
00012 /* End of Tapkee includes */
00013 
00014 namespace tapkee
00015 {
00016 namespace tapkee_internal
00017 {
00018 
00019 template <class RandomAccessIterator, class PairwiseCallback>
00020 DenseMatrix spe_embedding(RandomAccessIterator begin, RandomAccessIterator end,
00021         PairwiseCallback callback, const Neighbors& neighbors,
00022         IndexType target_dimension, bool global_strategy,
00023         ScalarType tolerance, int nupdates, IndexType max_iter)
00024 {
00025     timed_context context("SPE embedding computation");
00026     IndexType k = 0;
00027     if (!global_strategy)
00028         k = neighbors[0].size();
00029 
00030     // The number of data points
00031     int N = end-begin;
00032     while (nupdates > N/2)
00033         nupdates = N/2;
00034 
00035     // Look for the maximum distance
00036     ScalarType max = 0.0;
00037     for (RandomAccessIterator i_iter=begin; i_iter!=end; ++i_iter)
00038     {
00039         for (RandomAccessIterator j_iter=i_iter+1; j_iter!=end; ++j_iter)
00040         {
00041             max = std::max(max, callback.distance(*i_iter,*j_iter));
00042         }
00043     }
00044 
00045     // Distances normalizer used in global strategy
00046     ScalarType alpha = 0.0;
00047     if (global_strategy)
00048         alpha = 1.0 / max * std::sqrt(2.0);
00049 
00050     // Random embedding initialization, Y is the short for embedding_feature_matrix
00051     DenseMatrix Y = (DenseMatrix::Random(target_dimension,N)
00052                + DenseMatrix::Ones(target_dimension,N)) / 2;
00053     // Auxiliary diffference embedding feature matrix
00054     DenseMatrix Yd(target_dimension,nupdates);
00055 
00056     // SPE's main loop
00057     
00058     typedef std::vector<int> Indices;
00059     typedef std::vector<int>::iterator IndexIterator;
00060 
00061     // Maximum number of iterations
00062     if (max_iter == 0)
00063     {
00064         max_iter = 2000 + static_cast<IndexType>(floor(0.04 * N*N));
00065         if (!global_strategy)
00066             max_iter *= 3;
00067     }
00068 
00069     // Learning parameter
00070     ScalarType lambda = 1.0;
00071     // Vector of indices used for shuffling
00072     Indices indices(N);
00073     for (int i=0; i<N; ++i)
00074         indices[i] = i;
00075     // Vector with distances in the original space of the points to update
00076     DenseVector Rt(nupdates);
00077     DenseVector scale(nupdates);
00078     DenseVector D(nupdates);
00079     // Pointers to the indices of the elements to update
00080     IndexIterator ind1;
00081     IndexIterator ind2;
00082     // Helper used in local strategy
00083     Indices ind1Neighbors;
00084     if (!global_strategy)
00085         ind1Neighbors.resize(k*nupdates);
00086 
00087     for (IndexType i=0; i<max_iter; ++i)
00088     {
00089         // Shuffle to select the vectors to update in this iteration
00090         tapkee::random_shuffle(indices.begin(),indices.end());
00091 
00092         ind1 = indices.begin();
00093         ind2 = indices.begin()+nupdates;
00094 
00095         // With local strategy, the seecond set of indices is selected among
00096         // neighbors of the first set
00097         if (!global_strategy)
00098         {
00099             // Neighbors of interest
00100             for(int j=0; j<nupdates; ++j)
00101             {
00102                 const LocalNeighbors& current_neighbors =
00103                     neighbors[*ind1++];
00104 
00105                 for(IndexType kk=0; kk<k; ++kk)
00106                     ind1Neighbors[kk + j*k] = current_neighbors[kk];
00107             }
00108             // Restore ind1
00109             ind1 = indices.begin();
00110 
00111             // Generate pseudo-random indices and select final indices
00112             for(int j=0; j<nupdates; ++j)
00113             {
00114                 IndexType r = static_cast<IndexType>(floor(tapkee::uniform_random()*(k-1)) + k*j);
00115                 indices[nupdates+j] = ind1Neighbors[r];
00116             }
00117         }
00118 
00119 
00120         // Compute distances between the selected points in the embedded space
00121         for(int j=0; j<nupdates; ++j)
00122         {
00123             //FIXME it seems that here Euclidean distance is forced
00124             D[j] = (Y.col(*ind1) - Y.col(*ind2)).norm();
00125             ++ind1, ++ind2;
00126         }
00127 
00128         // Get the corresponding distances in the original space
00129         if (global_strategy)
00130             Rt.fill(alpha);
00131         else // local_strategy
00132             Rt.fill(1);
00133 
00134         ind1 = indices.begin();
00135         ind2 = indices.begin()+nupdates;
00136         for (int j=0; j<nupdates; ++j)
00137             Rt[j] *= callback.distance(*(begin + *ind1++), *(begin + *ind2++));
00138 
00139         // Compute some terms for update
00140 
00141         // Scale factor
00142         D.array() += tolerance;
00143         scale = (Rt-D).cwiseQuotient(D);
00144 
00145         ind1 = indices.begin();
00146         ind2 = indices.begin()+nupdates;
00147         // Difference matrix
00148         for (int j=0; j<nupdates; ++j)
00149         {
00150             Yd.col(j).noalias() = Y.col(*ind1) - Y.col(*ind2);
00151 
00152             ++ind1, ++ind2;
00153         }
00154 
00155         ind1 = indices.begin();
00156         ind2 = indices.begin()+nupdates;
00157         // Update the location of the vectors in the embedded space
00158         for (int j=0; j<nupdates; ++j)
00159         {
00160             Y.col(*ind1) += lambda / 2 * scale[j] * Yd.col(j);
00161             Y.col(*ind2) -= lambda / 2 * scale[j] * Yd.col(j);
00162 
00163             ++ind1, ++ind2;
00164         }
00165 
00166         // Update the learning parameter
00167         lambda = lambda - ( lambda / max_iter );
00168     }
00169 
00170     return Y.transpose();
00171 }
00172 
00173 } // End of namespace tapkee_internal
00174 } // End of namespace tapkee
00175 
00176 #endif /* TAPKEE_SPE_H_ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines