Specialize TensorFlow classifier for GPU processing

2018-10-11 13:11:44 +02:00 · 2018-10-11 13:11:44 +02:00 · 58a6c4cc2f
parent 4bebcae4f2
commit 58a6c4cc2f
1 changed files with 137 additions and 1 deletions
--- a/Classification/include/CGAL/Classification/TensorFlow_neural_network_classifier.h
+++ b/Classification/include/CGAL/Classification/TensorFlow_neural_network_classifier.h
@ -106,6 +106,10 @@ class TensorFlow_neural_network_classifier

  
 public:
+
+  /// \cond SKIP_IN_MANUAL
+  const Feature_set& features() const { return m_features; }
+  /// \endcond
  
  /// \name Constructor
  /// @{
@ -418,6 +422,34 @@ public:
    for (std::size_t i = 0; i < m_labels.size(); ++ i)
      out[i] = output_data[i];
  }
+
+  
+  void operator() (const std::vector<std::size_t>& item_indices,
+                   std::vector<std::vector<float> >& out) const
+  {
+    out.resize (item_indices.size(), std::vector<float>(m_labels.size(), 0.));
+    
+    TF::Tensor ft
+      (TF::DataTypeToEnum<float>::v(), 
+       TF::TensorShape {(long long)(item_indices.size()), (long long)(m_features.size())});
+
+    float* ft_data = ft.flat<float>().data();
+
+    // Fill input tensor
+    for (std::size_t i = 0; i < item_indices.size(); ++ i)
+      for (std::size_t f = 0; f < m_features.size(); ++ f)
+        ft_data[i * m_features.size() + f]
+          = (m_features[f]->value(item_indices[i]) - m_feature_means[f]) / m_feature_sd[f];
+    
+    std::vector<TF::Tensor> outputs;
+    TF_CHECK_OK(m_session->Run({{*m_ph_ft, ft}}, {m_layers.back()}, &outputs));
+
+    float* output_data = outputs[0].flat<float>().data();
+
+    for (std::size_t i = 0; i < item_indices.size(); ++ i)
+      for (std::size_t l = 0; l < m_labels.size(); ++ l)
+        out[i][l] = output_data[i * m_labels.size() + l];
+  }
  /// \endcond
  
  /// @}
@ -976,8 +1008,8 @@ private:
    // options.config.mutable_gpu_options()->set_operation_timeout_in_ms(15000);

 //    options.config.mutable_gpu_options()->set_visible_device_list("");
-    options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(0.3);
    options.config.mutable_gpu_options()->set_allow_growth(true);
+    options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(0.8);
    options.config.mutable_gpu_options()->set_force_gpu_compatible(true);

    m_session = new TF::ClientSession (*m_root, options);
@ -1004,6 +1036,110 @@ private:

 };

+
+#if 1
+// Specialization to use GPU parallelization
+template <typename ConcurrencyTag,
+          typename ItemRange,
+          typename LabelIndexRange,
+          typename ProbabilitiesRanges>
+void classify (const ItemRange& input,
+               const Label_set& labels,
+               const TensorFlow_neural_network_classifier& classifier,
+               LabelIndexRange& output,
+               ProbabilitiesRanges& probabilities)
+{
+  std::cerr << "Classify with TensorFlow classifier" << std::endl;
+  
+  output.resize(input.size());
+  probabilities.resize (labels.size());
+  for (std::size_t i = 0; i < probabilities.size(); ++ i)
+    probabilities[i].resize (input.size());
+
+  const std::size_t mem_allocated = sizeof(float) * input.size() * (labels.size() + classifier.features().size());
+  const std::size_t size_max = 1024 * 1024 * 1024;
+  const std::size_t nb_subdivisions = (mem_allocated / size_max) + 1;
+  std::cerr << nb_subdivisions << " subdivision(s) for GPU processing" << std::endl;
+
+  std::size_t idx = 0;
+  for (std::size_t n = 0; n < nb_subdivisions; ++ n)
+  {
+    std::vector<std::size_t> indices;
+    indices.reserve (input.size() / nb_subdivisions);
+    for (std::size_t i = 0; i < input.size() / nb_subdivisions && idx < input.size(); ++ i)
+      indices.push_back(idx ++);
+    
+    std::vector<std::vector<float> > values;
+    classifier (indices, values);
+    for(std::size_t i = 0; i < indices.size(); ++ i)
+    {
+      std::size_t nb_class_best = 0;
+      float val_class_best = 0.f;
+
+      for (std::size_t j = 0; j < labels.size(); ++ j)
+      {
+        probabilities[j][indices[i]] = values[i][j];
+        
+        if(val_class_best < values[i][j])
+        {
+          val_class_best = values[i][j];
+          nb_class_best = j;
+        }
+      }
+
+      output[indices[i]] = nb_class_best;
+    }
+  }
+}
+
+// Specialization to use GPU parallelization
+template <typename ConcurrencyTag,
+          typename ItemRange,
+          typename LabelIndexRange>
+void classify (const ItemRange& input,
+               const Label_set& labels,
+               const TensorFlow_neural_network_classifier& classifier,
+               LabelIndexRange& output)
+{
+  std::cerr << "Classify with TensorFlow classifier" << std::endl;
+  
+  output.resize(input.size());
+
+  const std::size_t mem_allocated = sizeof(float) * input.size() * (labels.size() + classifier.features().size());
+  const std::size_t size_max = 1024 * 1024 * 1024;
+  const std::size_t nb_subdivisions = (mem_allocated / size_max) + 1;
+  std::cerr << nb_subdivisions << " subdivision(s) for GPU processing" << std::endl;
+
+  std::size_t idx = 0;
+  for (std::size_t n = 0; n < nb_subdivisions; ++ n)
+  {
+    std::vector<std::size_t> indices;
+    indices.reserve (input.size() / nb_subdivisions);
+    for (std::size_t i = 0; i < input.size() / nb_subdivisions && idx < input.size(); ++ i)
+      indices.push_back(idx ++);
+    
+    std::vector<std::vector<float> > values;
+    classifier (indices, values);
+
+    for(std::size_t i = 0; i < indices.size(); ++ i)
+    {
+      std::size_t nb_class_best = 0;
+      float val_class_best = 0.f;
+
+      for (std::size_t j = 0; j < labels.size(); ++ j)
+      {
+        if(val_class_best < values[i][j])
+        {
+          val_class_best = values[i][j];
+          nb_class_best = j;
+        }
+      }
+
+      output[indices[i]] = nb_class_best;
+    }
+  }
+}
+#endif  
 }

 }