From 69272e4d9af732529249ba1ac8fef4ebdf1f133a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Jamin?= <clement.jamin@inria.fr>
Date: Tue, 10 Apr 2012 13:23:51 +0000
Subject: [PATCH] Test with a worksharing strategy (based on TBB task
 scheduler) + test with parallel_do.

---
 .gitattributes                                |   1 +
 Mesh_2/include/CGAL/Mesher_level.h            | 280 ++++++++++++---
 Mesh_3/benchmark/Mesh_3/concurrency.cpp       |  59 +++-
 Mesh_3/demo/Mesh_3/Mesh_3_plugin.cpp          |  34 +-
 Mesh_3/demo/Mesh_3/Mesh_function.h            |   8 +
 Mesh_3/demo/Mesh_3/Scene_c3t3_item.cpp        |  25 +-
 Mesh_3/demo/Mesh_3/config.h                   |  28 +-
 .../CGAL/Mesh_3/Locking_data_structures.h     |   4 +-
 .../CGAL/Mesh_3/Worksharing_data_structures.h | 322 ++++++++++++++++++
 .../CGAL/Concurrent_compact_container.h       |   3 +-
 .../include/CGAL/Triangulation_3.h            |  34 +-
 .../CGAL/Triangulation_ds_vertex_base_3.h     |  33 +-
 12 files changed, 745 insertions(+), 86 deletions(-)
 create mode 100644 Mesh_3/include/CGAL/Mesh_3/Worksharing_data_structures.h
diff --git a/.gitattributes b/.gitattributes
index c9c544a15e5..d76de42573e 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -2709,6 +2709,7 @@ Mesh_3/examples/Mesh_3/old_stuff/inputs/cube.mesh -text svneol=unset#application
 Mesh_3/examples/Mesh_3/old_stuff/inputs/tangle.mesh -text svneol=unset#application/octet-stream
 Mesh_3/include/CGAL/Mesh_3/Locking_data_structures.h -text
 Mesh_3/include/CGAL/Mesh_3/Profiling_tools.h -text
+Mesh_3/include/CGAL/Mesh_3/Worksharing_data_structures.h -text
 Mesh_3/include/CGAL/Meshes/Filtered_multimap_container.h -text
 Mesh_3/include/CGAL/Triangulation_lazy_ds_cell_base_3.h -text
 Mesh_3/package_info/Mesh_3/description.txt -text
diff --git a/Mesh_2/include/CGAL/Mesher_level.h b/Mesh_2/include/CGAL/Mesher_level.h
index af44636a10a..710ad4da9cf 100644
--- a/Mesh_2/include/CGAL/Mesher_level.h
+++ b/Mesh_2/include/CGAL/Mesher_level.h
@@ -26,24 +26,28 @@
 #endif
 
 #ifdef CONCURRENT_MESH_3
-  #include <algorithm>
+# include <algorithm>
 
-  #include <tbb/tbb.h>
+# include <tbb/tbb.h>
 
-  #include <CGAL/hilbert_sort.h> //CJTODO: remove?
-  #include <CGAL/spatial_sort.h> //CJTODO: remove?
-  #include <CGAL/Mesh_3/Locking_data_structures.h> // CJODO TEMP?
-  #include <CGAL/BBox_3.h>
-  
-  #ifdef CGAL_CONCURRENT_MESH_3_PROFILING
-    #define CGAL_PROFILE
-    #include <CGAL/Profile_counter.h>
-  #endif
+# include <CGAL/hilbert_sort.h> //CJTODO: remove?
+# include <CGAL/spatial_sort.h> //CJTODO: remove?
+# include <CGAL/Mesh_3/Locking_data_structures.h> // CJODO TEMP?
+# ifdef CGAL_MESH_3_WORKSHARING_USES_TASKS
+#   include <CGAL/Mesh_3/Worksharing_data_structures.h>
+#   include <tbb/task.h>
+# endif
+# include <CGAL/BBox_3.h>
+   
+# ifdef CGAL_CONCURRENT_MESH_3_PROFILING
+#   define CGAL_PROFILE
+#   include <CGAL/Profile_counter.h>
+# endif
   
   // CJTODO TEMP TEST
-#ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX
+# ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX
   extern bool g_is_set_cell_active;
-#endif
+# endif
 
   // CJTODO TEMP: not thread-safe => move it to Mesher_3
   extern CGAL::Bbox_3 g_bbox;
@@ -545,6 +549,11 @@ public:
     typedef typename Derived::Container::Element Container_element;
     typedef typename Derived::Container::Quality Container_quality;
 
+  //=======================================================
+  //================= PARALLEL_FOR?
+  //=======================================================
+
+# ifdef CGAL_MESH_3_WORKSHARING_USES_PARALLEL_FOR
     /*std::pair<Container_quality, Container_element>
       raw_elements[ELEMENT_BATCH_SIZE];*/
     std::vector<Container_element> container_elements;
@@ -573,9 +582,9 @@ public:
       indices.push_back(iElt);
     }
 
-# ifdef CGAL_CONCURRENT_MESH_3_VERBOSE
+#   ifdef CGAL_CONCURRENT_MESH_3_VERBOSE
     std::cerr << "Refining a batch of " << iElt << " elements...";
-# endif
+#   endif
     
     // Doesn't help much
     //typedef Spatial_sort_traits_adapter_3<Tr::Geom_traits, Point*> Search_traits;
@@ -599,8 +608,6 @@ public:
         {
           for( size_t i = r.begin() ; i != r.end() ; )
           {
-            before_next_element_refinement(visitor);
-
             std::ptrdiff_t index = indices[i];
             Container_element ce = container_elements[index];
 
@@ -616,7 +623,6 @@ public:
                 break;
 
               case COULD_NOT_LOCK_ZONE:
-              case COULD_NOT_LOCK_ELEMENT:
               {
                 // Swap indices[i] and indices[i+1]
                 if (i+1 != r.end())
@@ -634,11 +640,15 @@ public:
                 break;
               }
               
+              case COULD_NOT_LOCK_ELEMENT:
+                // We retry it now
               case THE_FACET_TO_REFINE_IS_NOT_IN_ITS_CONFLICT_ZONE:
                 // We retry it since we switched to exact computation
                 // for the adjacent cells circumcenters
                 break;
             }
+            
+            before_next_element_refinement(visitor);
           }
         }
       );
@@ -681,9 +691,195 @@ public:
       }
     }
 
-# ifdef CGAL_CONCURRENT_MESH_3_VERBOSE
-      std::cerr << " batch done." << std::endl;
-# endif
+#   ifdef CGAL_CONCURRENT_MESH_3_VERBOSE
+    std::cerr << " batch done." << std::endl;
+#   endif
+      
+  //=======================================================
+  //================= PARALLEL_DO?
+  //=======================================================
+
+# elif defined(CGAL_MESH_3_WORKSHARING_USES_PARALLEL_DO)
+    std::vector<Container_element> container_elements;
+    container_elements.reserve(ELEMENT_BATCH_SIZE);
+    
+    while(!no_longer_element_to_refine())
+    {
+      Container_element ce = derived().get_next_raw_element_impl().second;
+      pop_next_element();
+      container_elements.push_back(ce);
+    }
+
+#   ifdef CGAL_CONCURRENT_MESH_3_VERBOSE
+    std::cerr << "Refining elements in parallel...";
+#   endif
+    
+    // CJTODO: lambda functions OK?
+    
+    //g_is_set_cell_active = false;
+    previous_level.add_to_TLS_lists(true);
+    add_to_TLS_lists(true);
+    tbb::parallel_do(
+      container_elements.begin(), container_elements.end(),
+      [&] (Container_element& ce, tbb::parallel_do_feeder<Container_element>& feeder)
+      {
+        Mesher_level_conflict_status status;
+        do 
+        {
+          status = try_lock_and_refine_element(ce, visitor);
+        }
+        while (status == COULD_NOT_LOCK_ELEMENT 
+          || status == THE_FACET_TO_REFINE_IS_NOT_IN_ITS_CONFLICT_ZONE);
+
+        switch (status)
+        {
+          case NO_CONFLICT:
+          case CONFLICT_AND_ELEMENT_SHOULD_BE_DROPPED:
+          case ELEMENT_WAS_A_ZOMBIE:
+            break;
+
+          case COULD_NOT_LOCK_ZONE:
+          {
+            feeder.add(ce);
+            break;
+          }
+              
+          /*case COULD_NOT_LOCK_ELEMENT:
+            // We retry it now
+          case THE_FACET_TO_REFINE_IS_NOT_IN_ITS_CONFLICT_ZONE:
+            // We retry it since we switched to exact computation
+            // for the adjacent cells circumcenters
+            break;*/
+        }
+        
+        before_next_element_refinement(visitor); 
+
+        // Finally we add the new local bad_elements to the feeder
+        while (no_longer_local_element_to_refine() == false)
+        {
+          typedef typename Derived::Container::Element Container_element;
+          Container_element ce = derived().get_next_local_raw_element_impl().second;
+          pop_next_local_element();
+
+          feeder.add(ce);
+        } 
+      }
+    );
+    splice_local_lists();
+    CGAL_assertion(no_longer_element_to_refine());
+    //previous_level.splice_local_lists(); // useless
+    previous_level.add_to_TLS_lists(false);
+    add_to_TLS_lists(false);
+    //g_is_set_cell_active = true;
+    
+
+#   ifdef CGAL_CONCURRENT_MESH_3_VERBOSE
+    std::cerr << " done." << std::endl;
+#   endif
+  //=======================================================
+  //================= TASKS?
+  //=======================================================
+
+# elif defined(CGAL_MESH_3_WORKSHARING_USES_TASKS)
+
+    std::vector<Container_element> container_elements;
+    container_elements.reserve(ELEMENT_BATCH_SIZE);
+    
+    int iElt = 0;
+    for( ; 
+          iElt < ELEMENT_BATCH_SIZE && !no_longer_element_to_refine() ; 
+          ++iElt )
+    {
+      Container_element ce = derived().get_next_raw_element_impl().second;
+      pop_next_element();
+      container_elements.push_back(ce);
+    }
+    
+#   ifdef CGAL_CONCURRENT_MESH_3_VERBOSE
+    std::cerr << "Refining a batch of " << iElt << " elements...";
+#   endif
+    
+    // CJTODO: lambda functions OK?
+    if (iElt > 20)
+    {
+      //g_is_set_cell_active = false;
+      previous_level.add_to_TLS_lists(true);
+      add_to_TLS_lists(true);
+      
+      tbb::task& empty_root_task = *new( tbb::task::allocate_root() ) tbb::empty_task;
+      empty_root_task.set_ref_count(iElt + 1);
+
+      for( size_t i = 0 ; i < iElt ; ++i)
+      {
+        Container_element ce = container_elements[i];
+        
+        Mesh_3::enqueue_work(
+          [&, ce, visitor]()
+          {
+            Mesher_level_conflict_status status;
+            do
+            {
+              status = try_lock_and_refine_element(ce, visitor);
+              before_next_element_refinement(visitor);
+            }
+            while (status != NO_CONFLICT
+              && status != CONFLICT_AND_ELEMENT_SHOULD_BE_DROPPED
+              && status != ELEMENT_WAS_A_ZOMBIE);
+          },
+          empty_root_task,
+          circumcenter(derived().extract_element_from_container_value(ce)));
+      }
+      empty_root_task.wait_for_all();
+      tbb::task::destroy(empty_root_task);
+
+      splice_local_lists();
+      //previous_level.splice_local_lists(); // useless
+      previous_level.add_to_TLS_lists(false);
+      add_to_TLS_lists(false);
+      //g_is_set_cell_active = true;
+    }
+    // Go sequential
+    else
+    {
+      for (int i = 0 ; i < iElt ; )
+      {
+        std::ptrdiff_t index = i;
+
+        Derived &derivd = derived();
+        //Container_element ce = raw_elements[index].second;
+        Container_element ce = container_elements[index];
+        if( !derivd.is_zombie(ce) )
+        {
+          // Lock the element area on the grid
+          Element element = derivd.extract_element_from_container_value(ce);
+          
+          const Mesher_level_conflict_status result 
+            = try_to_refine_element(element, visitor);
+
+          if (result != CONFLICT_BUT_ELEMENT_CAN_BE_RECONSIDERED
+            && result != THE_FACET_TO_REFINE_IS_NOT_IN_ITS_CONFLICT_ZONE)
+          {
+            ++i;
+          }
+        }
+        else
+        {
+          ++i;
+        }
+        // Unlock
+        unlock_all_thread_local_elements();
+      }
+    }
+
+#   ifdef CGAL_CONCURRENT_MESH_3_VERBOSE
+    std::cerr << " batch done." << std::endl;
+#   endif
+
+#endif
+  //=======================================================
+  //================= / WORKSHARING STRATEGY
+  //=======================================================
+
   }
 
   /** 
@@ -712,36 +908,37 @@ public:
     std::cerr << "Trying to insert point: " << p << std::endl;
 #endif
     
+    
+//=========================================
 //==== Simple Grid locking
+//=========================================
 #if defined(CGAL_MESH_3_CONCURRENT_REFINEMENT) && \
     defined(CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING)
 
     Mesher_level_conflict_status result;
     Zone zone;
-    if( g_lock_grid.try_lock(p).first )
-    {
-      before_conflicts(e, p, visitor);
+
+    before_conflicts(e, p, visitor);
       
-      bool could_lock_zone;
-      bool facet_not_in_its_cz = false;
-      zone = conflicts_zone(p, e, facet_not_in_its_cz, could_lock_zone);
+    bool could_lock_zone;
+    bool facet_not_in_its_cz = false;
+    zone = conflicts_zone(p, e, facet_not_in_its_cz, could_lock_zone);
       
-      if (!could_lock_zone)
-        result = COULD_NOT_LOCK_ZONE;
-      else if (facet_not_in_its_cz)
-        result = THE_FACET_TO_REFINE_IS_NOT_IN_ITS_CONFLICT_ZONE;
-      else
-        result = test_point_conflict(p, zone, visitor);
-    }
-    else
-    {
+    if (!could_lock_zone)
       result = COULD_NOT_LOCK_ZONE;
-    }
+    else if (facet_not_in_its_cz)
+      result = THE_FACET_TO_REFINE_IS_NOT_IN_ITS_CONFLICT_ZONE;
+    else
+      result = test_point_conflict(p, zone, visitor);
 
-//==== !Simple Grid locking
+//=========================================
+//==== NOT Simple Grid locking
+//=========================================
 #else
+    
+    before_conflicts(e, p, visitor);
 
-    // Concurrent?
+    //=========== Concurrent? =============
 #  ifdef CGAL_MESH_3_CONCURRENT_REFINEMENT
     bool could_lock_zone;
     bool facet_not_in_its_cz = false;
@@ -754,7 +951,7 @@ public:
     else
       result = test_point_conflict(p, zone, visitor);
 
-    // ... or not?
+    //=========== or not? =================
 #  else
     bool facet_not_in_its_cz = false;
     Zone zone = conflicts_zone(p, e, facet_not_in_its_cz);
@@ -766,6 +963,9 @@ public:
 #  endif
 
 #endif
+//=========================================
+//==== / Simple Grid locking
+//=========================================
       
 #ifdef CGAL_MESHES_DEBUG_REFINEMENT_POINTS
     std::cerr << "(" << p << ") ";
diff --git a/Mesh_3/benchmark/Mesh_3/concurrency.cpp b/Mesh_3/benchmark/Mesh_3/concurrency.cpp
index c9e8b1ae092..34a2ccce719 100644
--- a/Mesh_3/benchmark/Mesh_3/concurrency.cpp
+++ b/Mesh_3/benchmark/Mesh_3/concurrency.cpp
@@ -14,7 +14,7 @@
 # define CGAL_MESH_3_CONCURRENT_REFINEMENT
   // In case some code uses CGAL_PROFILE, it needs to be concurrent
 # define CGAL_CONCURRENT_PROFILE
-//# define CGAL_CONCURRENT_MESH_3_VERBOSE
+# define CGAL_CONCURRENT_MESH_3_VERBOSE
 
   // ==========================================================================
   // Locking strategy
@@ -24,10 +24,29 @@
 //#   define CGAL_MESH_3_LOCKING_STRATEGY_CELL_LOCK
 #   define CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING
 //#   define CGAL_MESH_3_CONCURRENT_REFINEMENT_LOCK_ADJ_CELLS
+//#   define CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX
+//#   define CGAL_MESH_3_ACTIVATE_GRID_INDEX_CACHE_IN_VERTEX
 
+//#   define CGAL_MESH_3_WORKSHARING_USES_TASKS
+//#     define CGAL_MESH_3_WORKSHARING_USES_PARALLEL_FOR
+#     define CGAL_MESH_3_WORKSHARING_USES_PARALLEL_DO
+
+#   ifdef CGAL_MESH_3_WORKSHARING_USES_TASKS
+    const int MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS = 25;
+    const int MESH_3_FIRST_GRID_LOCK_RADIUS = 0;
+
+    const int MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS = 2;
+    const int MESH_3_WORK_STATS_GRID_NUM_CELLS = 
+      MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS*
+      MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS*
+      MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS;
+
+#   else
     const int MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS = 30;
     const int MESH_3_FIRST_GRID_LOCK_RADIUS = 2;
     const int MESH_3_REFINEMENT_GRAINSIZE = 10;
+#   endif
+
 
 #   ifdef CGAL_MESH_3_LOCKING_STRATEGY_CELL_LOCK
 #     include <tbb/recursive_mutex.h>
@@ -46,7 +65,7 @@
   // Concurrency Parameters
   // ==========================================================================
 
-  const size_t ELEMENT_BATCH_SIZE = 10000;
+  const size_t ELEMENT_BATCH_SIZE = 100000;
 
   // ==========================================================================
   // Profiling
@@ -85,8 +104,20 @@ bool g_temp = false;
   Global_mutex_type g_global_mutex; // CJTODO: temporary
   
   // CJTODO TEMP: not thread-safe => move it to Mesher_3
+  
   // Elephant.off => BBox (x,y,z): [ -0.358688, 0.356308 ], [ -0.498433, 0.49535 ], [ -0.298931, 0.298456 ]
-  CGAL::Bbox_3 g_bbox(-0.35, 0.35, -0.5, 0.5, -0.3, 0.3);
+  //const char *INPUT_FILE_NAME = "D:/INRIA/CGAL/workingcopy/Mesh_3/examples/Mesh_3/data/elephant.off";
+  //CGAL::Bbox_3 g_bbox(-0.36, 0.36, -0.5, 0.5, -0.3, 0.3);
+  
+  // Fandisk.off => BBox (x,y,z): [ -0.4603, 0.4603 ], [ -0.254894, 0.25555 ], [ -0.499801, 0.499177 ], 
+  const char *INPUT_FILE_NAME = "D:/INRIA/CGAL/workingcopy/Mesh_3/examples/Mesh_3/data/fandisk.off";
+  CGAL::Bbox_3 g_bbox(-0.47, 0.47, -0.26, 0.26, -0.5, 0.5);
+  
+# ifdef CGAL_MESH_3_WORKSHARING_USES_TASKS
+#   include <CGAL/Mesh_3/Worksharing_data_structures.h> // CJODO TEMP?
+    CGAL::Mesh_3::Worksharing_ds_type g_worksharing_ds;
+# endif
+
 # ifdef CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING
   CGAL::Mesh_3::Refinement_grid_type g_lock_grid(g_bbox, MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS);
 
@@ -172,14 +203,24 @@ bool refine_mesh(const std::string &input_filename)
   // Create domain
   Mesh_domain domain(polyhedron);
 
+  // Very small elements
   Mesh_parameters params;
   params.facet_angle = 25;
+  params.facet_sizing = 0.001;
+  params.facet_approx = 0.0068;
+  params.tet_shape = 3;
+  params.tet_sizing = 0.001;
+  
+  // Middle-size elements
+  /*Mesh_parameters params;
+  params.facet_angle = 25;
   params.facet_sizing = 0.002;
   params.facet_approx = 0.0068;
-  /*params.tet_shape = 3;
-  params.tet_sizing = 1.;*/
-  
+  params.tet_shape = 3;
+  params.tet_sizing = 0.005;*/
+
   std::cerr 
+    << "File: " << input_filename << std::endl
     << "Parameters: " << std::endl 
     << params.log() << std::endl;
 
@@ -187,9 +228,9 @@ bool refine_mesh(const std::string &input_filename)
   Mesh_criteria criteria(
     facet_angle=params.facet_angle,
     facet_size=params.facet_sizing,
-    facet_distance=params.facet_approx/*,
+    facet_distance=params.facet_approx,
     cell_size=params.tet_sizing,
-    cell_radius_edge_ratio=params.tet_shape*/
+    cell_radius_edge_ratio=params.tet_shape
   );
 
   // Mesh generation
@@ -219,7 +260,7 @@ int main()
   for(int i = 1 ; ; ++i)
   {
     std::cerr << "Refinement #" << i << "..." << std::endl;
-    refine_mesh("D:/INRIA/CGAL/workingcopy/Mesh_3/examples/Mesh_3/data/elephant.off");
+    refine_mesh(INPUT_FILE_NAME);
     std::cerr << "Refinement #" << i << " done." << std::endl;
     std::cerr << std::endl << "---------------------------------" << std::endl << std::endl;
   }
diff --git a/Mesh_3/demo/Mesh_3/Mesh_3_plugin.cpp b/Mesh_3/demo/Mesh_3/Mesh_3_plugin.cpp
index 3d3d3b54099..60ac014b9d7 100644
--- a/Mesh_3/demo/Mesh_3/Mesh_3_plugin.cpp
+++ b/Mesh_3/demo/Mesh_3/Mesh_3_plugin.cpp
@@ -34,28 +34,38 @@
 bool g_temp = false;
 
 #ifdef CONCURRENT_MESH_3
-  #include <CGAL/Mesh_3/Locking_data_structures.h> // CJODO TEMP?
-  #include <CGAL/BBox_3.h>
+
+# include <CGAL/BBox_3.h>
+# include <CGAL/Mesh_3/Locking_data_structures.h> // CJODO TEMP?
 
   // CJTODO TEMP TEST
-#ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX
+# ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX
   bool g_is_set_cell_active = true;
-#endif
+# endif
 
   Global_mutex_type g_global_mutex; // CJTODO: temporary
 
-
-  // CJTODO TEMP: not thread-safe => move it to Mesher_3
   // Elephant.off => BBox (x,y,z): [ -0.358688, 0.356308 ], [ -0.498433, 0.49535 ], [ -0.298931, 0.298456 ]
-  CGAL::Bbox_3 g_bbox(-0.35, 0.35, -0.5, 0.5, -0.3, 0.3);
+  const char *INPUT_FILE_NAME = "D:/INRIA/CGAL/workingcopy/Mesh_3/examples/Mesh_3/data/elephant.off";
+  CGAL::Bbox_3 g_bbox(-0.36, 0.36, -0.5, 0.5, -0.3, 0.3);
+  
+  // Fandisk.off => BBox (x,y,z): [ -0.4603, 0.4603 ], [ -0.254894, 0.25555 ], [ -0.499801, 0.499177 ], 
+  //const char *INPUT_FILE_NAME = "D:/INRIA/CGAL/workingcopy/Mesh_3/examples/Mesh_3/data/fandisk.off";
+  //CGAL::Bbox_3 g_bbox(-0.47, 0.47, -0.26, 0.26, -0.5, 0.5);
+  
+# ifdef CGAL_MESH_3_WORKSHARING_USES_TASKS
+#   include <CGAL/Mesh_3/Worksharing_data_structures.h> // CJODO TEMP?
+    CGAL::Mesh_3::Worksharing_ds_type g_worksharing_ds;
+# endif
+
 # ifdef CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING
-  CGAL::Mesh_3::Refinement_grid_type g_lock_grid(g_bbox, MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS);
+    CGAL::Mesh_3::Refinement_grid_type g_lock_grid(g_bbox, MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS);
 
 # elif defined(CGAL_MESH_3_LOCKING_STRATEGY_CELL_LOCK)
-# include <utility>
-# include <vector>
-# include <tbb/enumerable_thread_specific.h>
-  tbb::enumerable_thread_specific<std::vector<std::pair<void*, unsigned int> > > g_tls_locked_cells;
+#   include <utility>
+#   include <vector>
+#   include <tbb/enumerable_thread_specific.h>
+    tbb::enumerable_thread_specific<std::vector<std::pair<void*, unsigned int> > > g_tls_locked_cells;
 # endif
 
 #endif
diff --git a/Mesh_3/demo/Mesh_3/Mesh_function.h b/Mesh_3/demo/Mesh_3/Mesh_function.h
index 376cfa62c6b..7178b13e6da 100644
--- a/Mesh_3/demo/Mesh_3/Mesh_function.h
+++ b/Mesh_3/demo/Mesh_3/Mesh_function.h
@@ -37,6 +37,11 @@
 #include "C3t3_type.h"
 #include "Meshing_thread.h"
 
+// CJTODO TEMP: not thread-safe => move it to Mesher_3
+#include <CGAL/Mesh_3/Locking_data_structures.h> // CJODO TEMP?
+#ifdef CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING
+  extern CGAL::Mesh_3::Refinement_grid_type g_lock_grid;
+#endif
 
 struct Mesh_parameters
 {
@@ -158,6 +163,9 @@ launch()
        ++it )
   {
     Vertex_handle v = c3t3_.triangulation().insert(it->first);
+# ifdef CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING
+    g_lock_grid.unlock_all_tls_locked_cells();
+#endif
     c3t3_.set_dimension(v,2); // by construction, points are on surface
     c3t3_.set_index(v,it->second);
   }
diff --git a/Mesh_3/demo/Mesh_3/Scene_c3t3_item.cpp b/Mesh_3/demo/Mesh_3/Scene_c3t3_item.cpp
index 1ab13c86ea6..723d7d74ab4 100644
--- a/Mesh_3/demo/Mesh_3/Scene_c3t3_item.cpp
+++ b/Mesh_3/demo/Mesh_3/Scene_c3t3_item.cpp
@@ -20,19 +20,30 @@
 #include <QGLViewer/qglviewer.h>
 
 #ifdef CONCURRENT_MESH_3
-  #include <CGAL/Mesh_3/Locking_data_structures.h> // CJODO TEMP?
-  #include <CGAL/BBox_3.h>
+# include <CGAL/Mesh_3/Locking_data_structures.h> // CJODO TEMP?
+
+# include <CGAL/BBox_3.h>
 
   // CJTODO TEMP TEST
-#ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX
+# ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX
   bool g_is_set_cell_active = true;
-#endif
+# endif
 
   //Global_mutex_type g_global_mutex; // CJTODO: temporary
-
-  // CJTODO TEMP: not thread-safe => move it to Mesher_3
+  
   // Elephant.off => BBox (x,y,z): [ -0.358688, 0.356308 ], [ -0.498433, 0.49535 ], [ -0.298931, 0.298456 ]
-  CGAL::Bbox_3 g_bbox(-0.35, 0.35, -0.5, 0.5, -0.3, 0.3);
+  const char *INPUT_FILE_NAME = "D:/INRIA/CGAL/workingcopy/Mesh_3/examples/Mesh_3/data/elephant.off";
+  CGAL::Bbox_3 g_bbox(-0.36, 0.36, -0.5, 0.5, -0.3, 0.3);
+  
+  // Fandisk.off => BBox (x,y,z): [ -0.4603, 0.4603 ], [ -0.254894, 0.25555 ], [ -0.499801, 0.499177 ], 
+  //const char *INPUT_FILE_NAME = "D:/INRIA/CGAL/workingcopy/Mesh_3/examples/Mesh_3/data/fandisk.off";
+  //CGAL::Bbox_3 g_bbox(-0.47, 0.47, -0.26, 0.26, -0.5, 0.5);
+  
+# ifdef CGAL_MESH_3_WORKSHARING_USES_TASKS
+#   include <CGAL/Mesh_3/Worksharing_data_structures.h> // CJODO TEMP?
+    CGAL::Mesh_3::Worksharing_ds_type g_worksharing_ds;
+# endif
+
 # ifdef CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING
   CGAL::Mesh_3::Refinement_grid_type g_lock_grid(g_bbox, MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS);
 
diff --git a/Mesh_3/demo/Mesh_3/config.h b/Mesh_3/demo/Mesh_3/config.h
index 644d58c550a..e8207b758d7 100644
--- a/Mesh_3/demo/Mesh_3/config.h
+++ b/Mesh_3/demo/Mesh_3/config.h
@@ -50,11 +50,29 @@
 #   define CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING
 //#   define CGAL_MESH_3_CONCURRENT_REFINEMENT_LOCK_ADJ_CELLS
 //#   define CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX
-   
-    const int MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS = 30;
-    const int MESH_3_FIRST_GRID_LOCK_RADIUS = 2;
-    const int MESH_3_REFINEMENT_GRAINSIZE = 10;
+//#   define CGAL_MESH_3_ACTIVATE_GRID_INDEX_CACHE_IN_VERTEX
 
+#   define CGAL_MESH_3_WORKSHARING_USES_TASKS
+//#   define CGAL_MESH_3_WORKSHARING_USES_PARALLEL_FOR
+//#   define CGAL_MESH_3_WORKSHARING_USES_PARALLEL_DO
+
+#   ifdef CGAL_MESH_3_WORKSHARING_USES_TASKS
+      const int MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS = 25;
+      const int MESH_3_FIRST_GRID_LOCK_RADIUS = 0;
+
+      const int MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS = 2;
+      const int MESH_3_WORK_STATS_GRID_NUM_CELLS = 
+        MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS*
+        MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS*
+        MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS;
+
+#   else
+      const int MESH_3_LOCKING_GRID_NUM_CELLS_PER_AXIS = 30;
+      const int MESH_3_FIRST_GRID_LOCK_RADIUS = 2;
+      const int MESH_3_REFINEMENT_GRAINSIZE = 10;
+#   endif
+
+    
 #   ifdef CGAL_MESH_3_LOCKING_STRATEGY_CELL_LOCK
 #     include <tbb/recursive_mutex.h>
       typedef tbb::recursive_mutex Cell_mutex_type; // CJTODO try others
@@ -72,7 +90,7 @@
   // Concurrency Parameters
   // ==========================================================================
 
-  const size_t ELEMENT_BATCH_SIZE = 30000;
+  const size_t ELEMENT_BATCH_SIZE = 100000;
 
   // ==========================================================================
   // Profiling
diff --git a/Mesh_3/include/CGAL/Mesh_3/Locking_data_structures.h b/Mesh_3/include/CGAL/Mesh_3/Locking_data_structures.h
index f85d10042c7..5a923a792ea 100644
--- a/Mesh_3/include/CGAL/Mesh_3/Locking_data_structures.h
+++ b/Mesh_3/include/CGAL/Mesh_3/Locking_data_structures.h
@@ -719,9 +719,9 @@ protected:
   TLS_locked_cells                                m_tls_locked_cells;
 };
 
-typedef Simple_grid_locking_ds Refinement_grid_type;
+//typedef Simple_grid_locking_ds Refinement_grid_type;
 //typedef Simple_grid_locking_ds_with_mutex Refinement_grid_type;
-//typedef Simple_grid_locking_ds_with_thread_ids Refinement_grid_type;
+typedef Simple_grid_locking_ds_with_thread_ids Refinement_grid_type;
 
 
 } //namespace Mesh_3
diff --git a/Mesh_3/include/CGAL/Mesh_3/Worksharing_data_structures.h b/Mesh_3/include/CGAL/Mesh_3/Worksharing_data_structures.h
new file mode 100644
index 00000000000..61b5497765c
--- /dev/null
+++ b/Mesh_3/include/CGAL/Mesh_3/Worksharing_data_structures.h
@@ -0,0 +1,322 @@
+// Copyright (c) 2012  INRIA Sophia-Antipolis (France).
+// All rights reserved.
+//
+// This file is part of CGAL (www.cgal.org).
+// You can redistribute it and/or modify it under the terms of the GNU
+// General Public License as published by the Free Software Foundation,
+// either version 3 of the License, or (at your option) any later version.
+//
+// Licensees holding a valid commercial license may use this file in
+// accordance with the commercial license agreement provided with the software.
+//
+// This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
+// WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// $URL: $
+// $Id: $
+//
+// Author(s)     : Clement Jamin
+
+#ifdef CONCURRENT_MESH_3
+
+#ifndef CGAL_MESH_3_WORKSHARING_DATA_STRUCTURES_H
+#define CGAL_MESH_3_WORKSHARING_DATA_STRUCTURES_H
+
+#include <CGAL/Bbox_3.h>
+
+#include <tbb/concurrent_queue.h>
+#include <tbb/task.h>
+
+// CJTODO TEMP: not thread-safe => move it to Mesher_3
+extern CGAL::Bbox_3 g_bbox;
+
+namespace CGAL {
+namespace Mesh_3 {
+
+// Forward declarations
+class Dynamic_load_based_worksharing_ds;
+// Typedef
+typedef Dynamic_load_based_worksharing_ds Worksharing_ds_type;
+
+
+
+class Work_statistics
+{
+public:
+  // Constructors
+  
+  Work_statistics(const Bbox_3 &bbox, 
+                  int num_grid_cells_per_axis)
+    : m_num_grid_cells_per_axis(num_grid_cells_per_axis)
+  {
+    m_laziest_cell_index = 0;
+    m_laziest_cell_occupation = 1000;
+
+    int num_cells =
+      num_grid_cells_per_axis*num_grid_cells_per_axis*num_grid_cells_per_axis;
+    m_occupation_grid = new tbb::atomic<int>[num_cells];
+    // Initialize grid
+    for (int i = 0 ; i < num_cells ; ++i)
+      m_occupation_grid[i] = 0;
+
+    // Keep mins and resolutions
+    m_xmin = bbox.xmin();
+    m_ymin = bbox.ymin();
+    m_zmin = bbox.zmin();
+    double n = static_cast<double>(num_grid_cells_per_axis);
+    m_resolution_x = n / (bbox.xmax() - m_xmin);
+    m_resolution_y = n / (bbox.ymax() - m_ymin);
+    m_resolution_z = n / (bbox.zmax() - m_zmin);
+  }
+
+  /// Destructor
+  ~Work_statistics()
+  {
+    delete [] m_occupation_grid;
+  }
+
+  void add_occupation(int cell_index, int to_add, int num_items_in_work_queue)
+  {
+    int new_occupation = 
+      (m_occupation_grid[cell_index].fetch_and_add(to_add)) 
+      + to_add;
+
+    // If this cell is the current most lazy, update the value
+    if (cell_index == m_laziest_cell_index)
+    {
+      if (num_items_in_work_queue == 0)
+        // So that it won't stay long the laziest
+        m_laziest_cell_occupation = 999999;
+      else
+        m_laziest_cell_occupation = new_occupation;
+    }
+    else if (num_items_in_work_queue > 0 
+      && new_occupation <= m_laziest_cell_occupation)
+    {
+      m_laziest_cell_index = cell_index;
+      m_laziest_cell_occupation = new_occupation;
+    }
+  }
+  
+  void add_occupation(int index_x, int index_y, int index_z, 
+                      int to_add, int num_items_in_work_queue)
+  {
+    int index = 
+      index_z*m_num_grid_cells_per_axis*m_num_grid_cells_per_axis
+      + index_y*m_num_grid_cells_per_axis 
+      + index_x;
+    return add_occupation(index, to_add, num_items_in_work_queue);
+  }
+  
+  /// P3 must provide .x(), .y(), .z()
+  template <typename P3>
+  int compute_index(const P3 &point)
+  {
+    // Compute indices on grid
+    int index_x = static_cast<int>( (to_double(point.x()) - m_xmin) * m_resolution_x);
+    index_x = std::max( 0, std::min(index_x, m_num_grid_cells_per_axis - 1) );
+    int index_y = static_cast<int>( (to_double(point.y()) - m_ymin) * m_resolution_y);
+    index_y = std::max( 0, std::min(index_y, m_num_grid_cells_per_axis - 1) );
+    int index_z = static_cast<int>( (to_double(point.z()) - m_zmin) * m_resolution_z);
+    index_z = std::max( 0, std::min(index_z, m_num_grid_cells_per_axis - 1) );
+    
+    int index = 
+      index_z*m_num_grid_cells_per_axis*m_num_grid_cells_per_axis
+      + index_y*m_num_grid_cells_per_axis 
+      + index_x;
+
+    return index;
+  }
+
+  /// P3 must provide .x(), .y(), .z()
+  // Returns index in grid
+  template <typename P3>
+  int add_occupation(const P3 &point, int to_add, int num_items_in_work_queue)
+  {
+    int index = compute_index(point);
+    add_occupation(index, to_add, num_items_in_work_queue);
+    return index;
+  }
+
+  int get_laziest_cell_index()
+  {
+    return m_laziest_cell_index;
+  }
+  
+protected:
+  int                                             m_num_grid_cells_per_axis;
+  double                                          m_xmin;
+  double                                          m_ymin;
+  double                                          m_zmin;
+  double                                          m_resolution_x;
+  double                                          m_resolution_y;
+  double                                          m_resolution_z;
+  tbb::atomic<int> *                              m_occupation_grid;
+
+  tbb::atomic<int>                                m_laziest_cell_index;
+  tbb::atomic<int>                                m_laziest_cell_occupation;
+};
+
+
+/* 
+ * ==============
+ * class WorkItem
+ * Abstract base class for a piece of work.
+ * ==============
+ */
+class WorkItem 
+{
+public:
+  WorkItem() {}
+  // Derived class defines the actual work.
+  virtual void run() = 0;
+  virtual void set_index(int) = 0;
+  virtual int get_index() const = 0;
+};
+
+template<typename Func>
+class ConcreteWorkItem
+  : public WorkItem
+{
+public:
+  ConcreteWorkItem(const Func& func)
+    : m_func(func), m_index(-1)
+  {}
+  
+  void run() 
+  {
+    m_func();
+    delete this;
+  }
+  
+  void set_index(int index)
+  {
+    m_index = index;
+  }
+
+  int get_index() const
+  {
+    return m_index;
+  }
+
+private:
+  Func  m_func;
+  int   m_index;
+};
+
+
+
+/* 
+ * =================
+ * class RunWorkItem
+ * =================
+ */
+class RunWorkItem
+  : public tbb::task 
+{
+public:
+  RunWorkItem() {}
+
+private:
+  /*override*/inline tbb::task* execute();
+};
+
+
+
+/* 
+ * =======================================
+ * class Dynamic_load_based_worksharing_ds
+ * =======================================
+ */
+class Dynamic_load_based_worksharing_ds
+{
+public:
+  // Constructors
+  Dynamic_load_based_worksharing_ds()
+    : m_stats(g_bbox, MESH_3_WORK_STATS_GRID_NUM_CELLS_PER_AXIS)
+  {
+    for (int i = 0 ; i < MESH_3_WORK_STATS_GRID_NUM_CELLS ; ++i)
+      m_num_items[i] = 0;
+  }
+
+  /// Destructor
+  ~Dynamic_load_based_worksharing_ds()
+  {
+  }
+
+  template <typename P3>
+  void add(WorkItem * p_item, const P3 &point, tbb::task &parent_task)
+  {
+    int index = m_stats.compute_index(point);
+    p_item->set_index(index);
+    m_work_items[index].push(p_item);
+    ++m_num_items[index];
+    // CJTODO: try "spawn" instead of enqueue
+    tbb::task::enqueue(*new(parent_task.allocate_child()) RunWorkItem);
+  }
+
+  void run_next_work_item()
+  {
+    WorkItem *p_item = 0;
+    int index = m_stats.get_laziest_cell_index();
+    bool popped = m_work_items[index].try_pop(p_item);
+    // If queue is empty
+    if (!popped)
+    {
+      // Look for an non-empty queue
+      for (index = 0 ; !popped ; ++index)
+      {
+        CGAL_assertion(index < MESH_3_WORK_STATS_GRID_NUM_CELLS);
+        popped = m_work_items[index].try_pop(p_item);
+      }
+
+      --index;
+    }
+    --m_num_items[index];
+    CGAL_assertion(p_item != 0);
+    m_stats.add_occupation(index, 1, m_num_items[index]);
+    p_item->run();
+    m_stats.add_occupation(index, -1, m_num_items[index]);
+  }
+
+protected:
+  Work_statistics                   m_stats; 
+  tbb::concurrent_queue<WorkItem*>  m_work_items[MESH_3_WORK_STATS_GRID_NUM_CELLS];
+  tbb::atomic<int>                  m_num_items [MESH_3_WORK_STATS_GRID_NUM_CELLS];
+};
+
+
+} //namespace Mesh_3
+} //namespace CGAL
+
+extern CGAL::Mesh_3::Worksharing_ds_type g_worksharing_ds;
+
+namespace CGAL
+{
+namespace Mesh_3
+{
+
+inline tbb::task* RunWorkItem::execute()
+{
+  g_worksharing_ds.run_next_work_item();
+  return NULL;
+}
+
+/* 
+ * =====================
+ * function enqueue_work
+ * =====================
+ */
+template<typename Func, typename P3>
+void enqueue_work(Func f, tbb::task &parent_task, const P3 &point)
+{
+  g_worksharing_ds.add(new ConcreteWorkItem<Func>(f), 
+                       point,
+                       parent_task);
+}
+
+} //namespace Mesh_3
+} //namespace CGAL
+
+#endif // CGAL_MESH_3_WORKSHARING_DATA_STRUCTURES_H
+#endif // CONCURRENT_MESH_3
diff --git a/STL_Extension/include/CGAL/Concurrent_compact_container.h b/STL_Extension/include/CGAL/Concurrent_compact_container.h
index 58745cb7bbf..0c9358d07ee 100644
--- a/STL_Extension/include/CGAL/Concurrent_compact_container.h
+++ b/STL_Extension/include/CGAL/Concurrent_compact_container.h
@@ -562,7 +562,8 @@ private:
   }
   
   typedef tbb::enumerable_thread_specific<FreeList> Free_lists;
-  typedef tbb::spin_mutex                           Mutex; // CJTODO: try others
+  typedef tbb::queuing_mutex                        Mutex; // CJTODO: try others
+  //typedef tbb::spin_mutex                           Mutex; // CJTODO: try others
 
   // We store a vector of pointers to all allocated blocks and their sizes.
   // Knowing all pointers, we don't have to walk to the end of a block to reach
diff --git a/Triangulation_3/include/CGAL/Triangulation_3.h b/Triangulation_3/include/CGAL/Triangulation_3.h
index 2b8eccfb2b1..5f37e466cd1 100644
--- a/Triangulation_3/include/CGAL/Triangulation_3.h
+++ b/Triangulation_3/include/CGAL/Triangulation_3.h
@@ -523,6 +523,31 @@ public:
 
 #ifdef CGAL_MESH_3_CONCURRENT_REFINEMENT
   
+  bool try_lock_vertex(Vertex_handle vh, int lock_radius = 0) const
+  {
+#ifdef CGAL_MESH_3_ACTIVATE_GRID_INDEX_CACHE_IN_VERTEX
+    int grid_index = vh->get_grid_index_cache();
+    if (grid_index >= 0)
+    {
+      if (g_lock_grid.try_lock(grid_index, lock_radius))
+      {
+        // Has the cached valeu changed in the meantime?
+        if (vh->get_grid_index_cache() == grid_index)
+          return true;
+      }
+      return false;
+    }
+    else
+    {
+      std::pair<bool, int> r = g_lock_grid.try_lock(vh->point(), lock_radius);
+      vh->set_grid_index_cache(r.second);
+      return r.first;
+    }
+#else
+    return g_lock_grid.try_lock(vh->point(), lock_radius).first;
+#endif
+  }
+
   bool try_lock_element(Cell_handle cell_handle, int lock_radius = 0) const
   {
     bool success = true;
@@ -537,10 +562,10 @@ public:
       // We do not lock the infinite vertex
       if (!is_infinite(vh))
       {
-        success = g_lock_grid.try_lock(vh->point(), lock_radius).first;
+        success = try_lock_vertex(vh, lock_radius);
       }
 #   else
-      success = g_lock_grid.try_lock(vh->point(), lock_radius).first;
+      success = try_lock_vertex(vh, lock_radius);
 #   endif
     }
 # elif defined(CGAL_MESH_3_LOCKING_STRATEGY_CELL_LOCK)
@@ -556,10 +581,11 @@ public:
 # ifdef CGAL_MESH_3_LOCKING_STRATEGY_SIMPLE_GRID_LOCKING
     // Lock the element area on the grid
     Cell_handle cell = facet.first;
-    for (int iVertex = (facet.second+1)&3 ; success && iVertex != facet.second ; iVertex = (iVertex+1)&3)
+    for (int iVertex = (facet.second+1)&3 ; 
+         success && iVertex != facet.second ; iVertex = (iVertex+1)&3)
     {
       Vertex_handle vh = cell->vertex(iVertex);
-      success = g_lock_grid.try_lock(vh->point(), lock_radius).first;
+      success = try_lock_vertex(vh, lock_radius);
     }
 # elif defined(CGAL_MESH_3_LOCKING_STRATEGY_CELL_LOCK)
     success = facet.first->try_lock(); // CJTODO: we lock the cell => stupid?
diff --git a/Triangulation_3/include/CGAL/Triangulation_ds_vertex_base_3.h b/Triangulation_3/include/CGAL/Triangulation_ds_vertex_base_3.h
index 4a3b452fc26..ca58888ee53 100644
--- a/Triangulation_3/include/CGAL/Triangulation_ds_vertex_base_3.h
+++ b/Triangulation_3/include/CGAL/Triangulation_ds_vertex_base_3.h
@@ -32,6 +32,10 @@
 # endif
 #endif
 
+#ifdef CGAL_MESH_3_ACTIVATE_GRID_INDEX_CACHE_IN_VERTEX
+# include <tbb/atomic.h>
+#endif
+
 namespace CGAL {
 
 template < typename TDS = void >
@@ -46,13 +50,16 @@ public:
   struct Rebind_TDS { typedef Triangulation_ds_vertex_base_3<TDS2> Other; };
 
   
+  Triangulation_ds_vertex_base_3()
+    : _c()
 #ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX
-  Triangulation_ds_vertex_base_3()
-    : _c(), m_visited(false) {}
-#else
-  Triangulation_ds_vertex_base_3()
-    : _c() {}
+    , m_visited(false) 
 #endif
+  {
+#ifdef CGAL_MESH_3_ACTIVATE_GRID_INDEX_CACHE_IN_VERTEX
+    m_grid_index_cache = -1;
+#endif
+  }
 
   Triangulation_ds_vertex_base_3(Cell_handle c)
     : _c(c) {}
@@ -90,6 +97,17 @@ public:
   {
     return cell() != Cell_handle();
   }
+  
+#ifdef CGAL_MESH_3_ACTIVATE_GRID_INDEX_CACHE_IN_VERTEX
+  void set_grid_index_cache (int index)
+  {
+    m_grid_index_cache = index;
+  }
+  int get_grid_index_cache()
+  {
+    return m_grid_index_cache;
+  }
+#endif
 
   // For use by the Compact_container.
   void *   for_compact_container() const
@@ -101,9 +119,12 @@ private:
 
 #ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX
   mutable tbb::spin_mutex m_mutex;
+#endif
+#ifdef CGAL_MESH_3_ACTIVATE_GRID_INDEX_CACHE_IN_VERTEX
+  tbb::atomic<int> m_grid_index_cache;
 #endif
   Cell_handle _c;
-  
+
 #ifdef CGAL_MESH_3_DO_NOT_LOCK_INFINITE_VERTEX
 public:
   bool m_visited; // CJTODO TEMP TEST