diff --git a/Mesh_2/include/CGAL/Mesher_level.h b/Mesh_2/include/CGAL/Mesher_level.h
index be9becf2cec..d5582541150 100644
--- a/Mesh_2/include/CGAL/Mesher_level.h
+++ b/Mesh_2/include/CGAL/Mesher_level.h
@@ -34,7 +34,7 @@
 # include <CGAL/spatial_sort.h> //CJTODO: remove?
 # include <CGAL/Mesh_3/Locking_data_structures.h> // CJODO TEMP?
 # include <CGAL/Mesh_3/Worksharing_data_structures.h>
-# ifdef CGAL_MESH_3_WORKSHARING_USES_TASKS
+# ifdef CGAL_MESH_3_WORKSHARING_USES_TASK_SCHEDULER
 #   include <tbb/task.h>
 # endif
 # include <CGAL/BBox_3.h>
@@ -193,7 +193,7 @@ private:
   Mesh_3::WorksharingDataStructureType *m_worksharing_ds;
 #endif
 
-#ifdef CGAL_MESH_3_WORKSHARING_USES_TASKS
+#ifdef CGAL_MESH_3_WORKSHARING_USES_TASK_SCHEDULER
   tbb::task *m_empty_root_task;
 #endif
 
@@ -219,14 +219,14 @@ public:
     : previous_level(previous)
 #ifdef CGAL_MESH_3_CONCURRENT_REFINEMENT
     , FIRST_GRID_LOCK_RADIUS(
-        Concurrent_mesher_config::get_option<int>("first_grid_lock_radius"))
+        Concurrent_mesher_config::get().first_grid_lock_radius)
     , MESH_3_REFINEMENT_GRAINSIZE(
-        Concurrent_mesher_config::get_option<int>("first_grid_lock_radius"))
+        Concurrent_mesher_config::get().first_grid_lock_radius)
     , REFINEMENT_BATCH_SIZE(
-        Concurrent_mesher_config::get_option<int>("refinement_batch_size"))
+        Concurrent_mesher_config::get().refinement_batch_size)
     , m_lock_ds(p_lock_ds)
     , m_worksharing_ds(p_worksharing_ds)
-# ifdef CGAL_MESH_3_WORKSHARING_USES_TASKS
+# ifdef CGAL_MESH_3_WORKSHARING_USES_TASK_SCHEDULER
     , m_empty_root_task(0)
 # endif
 #endif
@@ -602,13 +602,15 @@ public:
     }
   }
 
-  template <typename Container_element, typename Mesh_visitor>
-  void enqueue_task(const Container_element &ce, Mesh_visitor visitor)
+  template <typename Container_element, typename Quality, typename Mesh_visitor>
+  void enqueue_task(
+    const Container_element &ce, const Quality &quality, Mesh_visitor visitor)
   {
+    typedef typename Derived::Container::value_type Container_quality_and_element;
     CGAL_assertion(m_empty_root_task != 0);
 
     m_worksharing_ds->enqueue_work(
-      [&, ce, visitor]()
+      [&, ce, quality, visitor]()
       {
         Mesher_level_conflict_status status;
         do
@@ -625,16 +627,17 @@ public:
 
         // We can now reconsider the element if requested
         if (status == CONFLICT_BUT_ELEMENT_CAN_BE_RECONSIDERED)
-          enqueue_task(ce, visitor);
+          enqueue_task(ce, quality, visitor);
 
         // Finally we add the new local bad_elements to the feeder
         while (no_longer_local_element_to_refine() == false)
         {
-          Container_element elt = derived().get_next_local_raw_element_impl().second;
+          Container_quality_and_element qe = derived().get_next_local_raw_element_impl();
           pop_next_local_element();
-          enqueue_task(elt, visitor);
+          enqueue_task(qe.second, qe.first, visitor);
         } 
       },
+      quality,
       *m_empty_root_task,
       circumcenter(derived().extract_element_from_container_value(ce)));
   }
@@ -646,6 +649,7 @@ public:
   template <class Mesh_visitor>
   void process_a_batch_of_elements(Mesh_visitor visitor)
   {
+    typedef typename Derived::Container::value_type Container_quality_and_element;
     typedef typename Derived::Container::Element Container_element;
     typedef typename Derived::Container::Quality Container_quality;
 
@@ -867,20 +871,10 @@ public:
     std::cerr << " done." << std::endl;
 #   endif
   //=======================================================
-  //================= TASKS?
+  //================= TASK-SCHEDULER?
   //=======================================================
 
-# elif defined(CGAL_MESH_3_WORKSHARING_USES_TASKS)
-
-    std::vector<Container_element> container_elements;
-    container_elements.reserve(REFINEMENT_BATCH_SIZE);
-    
-    while (!no_longer_element_to_refine())
-    {
-      Container_element ce = derived().get_next_raw_element_impl().second;
-      pop_next_element();
-      container_elements.push_back(ce);
-    }
+# elif defined(CGAL_MESH_3_WORKSHARING_USES_TASK_SCHEDULER)
     
 #   ifdef CGAL_CONCURRENT_MESH_3_VERBOSE
     std::cerr << "Refining elements...";
@@ -893,14 +887,13 @@ public:
     m_empty_root_task = new( tbb::task::allocate_root() ) tbb::empty_task;
     m_empty_root_task->set_ref_count(1);
 
-    std::vector<Container_element>::const_iterator it = 
-      container_elements.begin();
-    std::vector<Container_element>::const_iterator it_end = 
-      container_elements.end();
-    for( ; it != it_end ; ++it)
+    while (!no_longer_element_to_refine())
     {
-      enqueue_task(*it, visitor);
+      Container_quality_and_element qe = derived().get_next_raw_element_impl();
+      pop_next_element();
+      enqueue_task(qe.second, qe.first, visitor);
     }
+    
     m_empty_root_task->wait_for_all();
 
     std::cerr << " Flushing";
diff --git a/Mesh_3/demo/Mesh_3/Mesh_function.h b/Mesh_3/demo/Mesh_3/Mesh_function.h
index 9c0007a94cf..e78aa8790ab 100644
--- a/Mesh_3/demo/Mesh_3/Mesh_function.h
+++ b/Mesh_3/demo/Mesh_3/Mesh_function.h
@@ -170,9 +170,6 @@ launch()
                          Cell_criteria(p_.tet_shape,
                                        p_.tet_sizing));
   
-  // So that the config file is reloaded before each refinement
-  //Concurrent_mesher_config::unload_config_file();
-
   // Build mesher and launch refinement process
   mesher_ = new Mesher(c3t3_, *domain_, criteria);
   mesher_->initialize();
diff --git a/Mesh_3/include/CGAL/Mesh_3/Concurrent_mesher_config.h b/Mesh_3/include/CGAL/Mesh_3/Concurrent_mesher_config.h
index ec5006b6aae..6bfface5775 100644
--- a/Mesh_3/include/CGAL/Mesh_3/Concurrent_mesher_config.h
+++ b/Mesh_3/include/CGAL/Mesh_3/Concurrent_mesher_config.h
@@ -37,8 +37,9 @@ namespace po = boost::program_options;
 class Concurrent_mesher_config
 {
   // Private constructor (singleton)
-  Concurrent_mesher_config()
-  : m_loaded(false) {}
+  Concurrent_mesher_config() 
+    : m_config_file_loaded(false)
+  {}
 
 public:
   static Concurrent_mesher_config &get()
@@ -47,26 +48,39 @@ public:
     return singleton;
   }
 
-  static bool load_config_file(const char *filename)
+  static bool load_config_file(const char *filename = CONFIG_FILENAME, 
+    bool reload_if_already_loaded = false)
   {
-    return get().load_file(filename);
+    return get().load_file(filename, reload_if_already_loaded);
   }
+  
 
-  static void unload_config_file()
-  {
-    get().unload_file();
-  }
+  //=============== PUBLIC PARAMETERS ==============
 
-  template <typename OptionType>
-  static OptionType get_option(const char *option_name)
-  {
-    return get().get_option_value<OptionType>(option_name);
-  }
+  // From config file
+  int     locking_grid_num_cells_per_axis;
+  int     first_grid_lock_radius;
+  int     work_stats_grid_num_cells_per_axis;
+  int     num_work_items_per_batch;
+  int     refinement_grainsize;
+  int     refinement_batch_size;
+  float   num_vertices_of_coarse_mesh_per_core;
+  float   num_pseudo_infinite_vertices_per_core;
+
+  // Others
+
+
+  //================================================
 
 protected:
   
-  bool load_file(const char *filename)
+  bool load_file(
+    const char *filename = CONFIG_FILENAME, 
+    bool reload_if_already_loaded = false)
   {
+    if (m_config_file_loaded && reload_if_already_loaded == false)
+      return true;
+
     try
     {
       // Declare the supported options.
@@ -82,36 +96,47 @@ protected:
         ("num_pseudo_infinite_vertices_per_core", po::value<float>(), "");
 
       po::store(po::parse_config_file<char>(filename, desc), m_variables_map);
-      po::notify(m_variables_map); 
+      po::notify(m_variables_map);
     }
     catch (std::exception &e)
     {
       std::cerr << "Config file error: " << e.what() << std::endl;
       return false;
     }
-    m_loaded = true;
+
+    locking_grid_num_cells_per_axis = 
+      get_config_file_option_value<int>("locking_grid_num_cells_per_axis");
+    first_grid_lock_radius = 
+      get_config_file_option_value<int>("first_grid_lock_radius");
+    work_stats_grid_num_cells_per_axis = 
+      get_config_file_option_value<int>("work_stats_grid_num_cells_per_axis");
+    num_work_items_per_batch = 
+      get_config_file_option_value<int>("num_work_items_per_batch");
+    refinement_grainsize = 
+      get_config_file_option_value<int>("refinement_grainsize");
+    refinement_batch_size = 
+      get_config_file_option_value<int>("refinement_batch_size");
+    num_vertices_of_coarse_mesh_per_core = 
+      get_config_file_option_value<float>("num_vertices_of_coarse_mesh_per_core");
+    num_pseudo_infinite_vertices_per_core = 
+      get_config_file_option_value<float>("num_pseudo_infinite_vertices_per_core");
+
+    m_config_file_loaded = true;
+
     return true;
   }
 
-  void unload_file()
-  {
-    m_loaded = false;
-  }
-
   template <typename OptionType>
-  OptionType get_option_value(const char *option_name)
-  {
-    if (!m_loaded)
-      load_file(CONFIG_FILENAME);
-  
-    if (m_loaded && m_variables_map.count(option_name))
+  OptionType get_config_file_option_value(const char *option_name)
+  {  
+    if (m_variables_map.count(option_name))
       return m_variables_map[option_name].as<OptionType>();
     else
       return OptionType();
   }
 
-  bool              m_loaded;
   po::variables_map m_variables_map;
+  bool              m_config_file_loaded;
 };
 
 #endif // CGAL_MESH_3_CONCURRENT_MESHER_CONFIG_H
diff --git a/Mesh_3/include/CGAL/Mesh_3/Mesher_3.h b/Mesh_3/include/CGAL/Mesh_3/Mesher_3.h
index 6a765b3f66d..4244548e25b 100644
--- a/Mesh_3/include/CGAL/Mesh_3/Mesher_3.h
+++ b/Mesh_3/include/CGAL/Mesh_3/Mesher_3.h
@@ -180,8 +180,7 @@ Mesher_3<C3T3,MC,MD>::Mesher_3(C3T3& c3t3,
 :
 #ifdef CGAL_MESH_3_CONCURRENT_REFINEMENT
 m_lock_ds(c3t3.bbox(), // CJTODO: this is the bbox of the first N points => enlarge it?
-          Concurrent_mesher_config::get_option<int>(
-            "locking_grid_num_cells_per_axis")),
+          Concurrent_mesher_config::get().locking_grid_num_cells_per_axis),
 m_worksharing_ds(c3t3.bbox()), // CJTODO: this is the bbox of the first N points => enlarge it?
 #endif
 null_mesher_()
@@ -352,6 +351,10 @@ void
 Mesher_3<C3T3,MC,MD>::
 initialize()
 {
+#ifdef CONCURRENT_MESH_3
+  Concurrent_mesher_config::load_config_file(CONFIG_FILENAME, false);
+#endif
+
 #ifdef CGAL_MESH_3_CONCURRENT_REFINEMENT
   // we're not multi-thread, yet
   r_c3t3_.triangulation().set_lock_data_structure(0);
@@ -369,8 +372,7 @@ initialize()
   // => The coarse mesh can be used for a data-dependent space partitionning
   const int NUM_VERTICES_OF_COARSE_MESH = static_cast<int>(
     std::thread::hardware_concurrency()
-    *Concurrent_mesher_config::get_option<float>(
-      "num_vertices_of_coarse_mesh_per_core"));
+    *Concurrent_mesher_config::get().num_vertices_of_coarse_mesh_per_core);
   facets_mesher_.refine_sequentially_up_to_N_vertices(
     facets_visitor_, NUM_VERTICES_OF_COARSE_MESH);
   // Set new bounding boxes
@@ -402,8 +404,7 @@ initialize()
   Random_points_on_sphere_3<Point> random_point(radius*1.1);
   const int NUM_PSEUDO_INFINITE_VERTICES = static_cast<int>(
     std::thread::hardware_concurrency()
-    *Concurrent_mesher_config::get_option<float>(
-      "num_pseudo_infinite_vertices_per_core"));
+    *Concurrent_mesher_config::get().num_pseudo_infinite_vertices_per_core);
   for (int i = 0 ; i < NUM_PSEUDO_INFINITE_VERTICES ; ++i, ++random_point)
     r_c3t3_.triangulation().insert(*random_point);
 
diff --git a/Mesh_3/include/CGAL/Mesh_3/Worksharing_data_structures.h b/Mesh_3/include/CGAL/Mesh_3/Worksharing_data_structures.h
index b918188a2d4..5e53eeeec9e 100644
--- a/Mesh_3/include/CGAL/Mesh_3/Worksharing_data_structures.h
+++ b/Mesh_3/include/CGAL/Mesh_3/Worksharing_data_structures.h
@@ -32,6 +32,9 @@
 #include <tbb/concurrent_vector.h>
 
 #include <vector>
+#ifdef CGAL_MESH_3_TASK_SCHEDULER_SORTED_BATCHES_WITH_MULTISET
+# include <set>
+#endif
 
 namespace CGAL {
 namespace Mesh_3 {
@@ -40,8 +43,11 @@ namespace Mesh_3 {
 class Dynamic_load_based_worksharing_ds;
 class Dynamic_auto_worksharing_ds;
 // Typedef
-//typedef Dynamic_load_based_worksharing_ds WorksharingDataStructureType;
+#ifdef CGAL_MESH_3_LOAD_BASED_WORKSHARING
+typedef Dynamic_load_based_worksharing_ds WorksharingDataStructureType;
+#else
 typedef Dynamic_auto_worksharing_ds WorksharingDataStructureType;
+#endif
 
 
 
@@ -262,15 +268,16 @@ public:
   virtual void run() const = 0;
   virtual void set_index(int) = 0;
   virtual int get_index() const = 0;
+  virtual bool less_than(const WorkItem &) const = 0;
 };
 
-template<typename Func>
+template<typename Func, typename Quality>
 class ConcreteWorkItem
   : public WorkItem
 {
 public:
-  ConcreteWorkItem(const Func& func)
-    : m_func(func), m_index(-1)
+  ConcreteWorkItem(const Func& func, const Quality &quality)
+    : m_func(func), m_index(-1), m_quality(quality)
   {}
   
   void run() const
@@ -289,9 +296,31 @@ public:
     return m_index;
   }
 
+  bool less_than (const WorkItem &other) const
+  {
+    try
+    {
+      const ConcreteWorkItem& other_cwi = dynamic_cast<const ConcreteWorkItem<Func,Quality>&>(other);
+      return m_quality < other_cwi.m_quality;;
+    }
+    catch (const std::bad_cast&)
+    {
+      return false;
+    }
+  }
+
 private:
-  Func  m_func;
-  int   m_index; // CJTODO: USELESS?
+  Func      m_func;
+  int       m_index; // CJTODO: USELESS?
+  Quality   m_quality;
+};
+
+struct CompareTwoWorkItems
+{
+  bool operator()(const WorkItem *p1, const WorkItem *p2) const
+  {
+    return p1->less_than(*p2);
+  }
 };
 
 
@@ -305,18 +334,29 @@ class WorkBatch
 {
 public:
 
+#ifdef CGAL_MESH_3_TASK_SCHEDULER_SORTED_BATCHES_WITH_MULTISET
+  typedef std::multiset<const WorkItem *, CompareTwoWorkItems> Batch;
+#else
   typedef std::vector<const WorkItem *> Batch;
-  typedef Batch::const_iterator         BatchConstIterator;
+#endif
+  typedef Batch::const_iterator BatchConstIterator;
 
   WorkBatch() {}
 
   void add_work_item(const WorkItem *p_item)
   {
+#ifdef CGAL_MESH_3_TASK_SCHEDULER_SORTED_BATCHES_WITH_MULTISET
+    m_batch.insert(p_item);
+#else
     m_batch.push_back(p_item);
+#endif
   }
 
-  void run() const
+  void run()
   {
+#ifdef CGAL_MESH_3_TASK_SCHEDULER_SORTED_BATCHES_WITH_SORT
+    std::sort(m_batch.begin(), m_batch.end(), CompareTwoWorkItems());
+#endif
     BatchConstIterator it = m_batch.begin();
     BatchConstIterator it_end = m_batch.end();
     for ( ; it != it_end ; ++it)
@@ -368,12 +408,12 @@ class Dynamic_load_based_worksharing_ds
 public:
   // Constructors
   Dynamic_load_based_worksharing_ds(const Bbox_3 &bbox)
-    : m_num_cells_per_axis(Concurrent_mesher_config::get_option<int>(
-                                  "work_stats_grid_num_cells_per_axis")),
+    : m_num_cells_per_axis(
+        Concurrent_mesher_config::get().work_stats_grid_num_cells_per_axis),
       m_stats(bbox, m_num_cells_per_axis),
       m_num_cells(m_num_cells_per_axis*m_num_cells_per_axis*m_num_cells_per_axis),
       NUM_WORK_ITEMS_PER_BATCH(
-        Concurrent_mesher_config::get_option<int>("num_work_items_per_batch"))
+        Concurrent_mesher_config::get().num_work_items_per_batch)
   {
     m_tls_work_buffers = new TLS_WorkBuffer[m_num_cells];
     m_work_batches = new tbb::concurrent_queue<WorkBatch>[m_num_cells];
@@ -398,10 +438,10 @@ public:
     m_stats.set_bbox(bbox);
   }
 
-  template <typename P3, typename Func>
-  void enqueue_work(Func f, tbb::task &parent_task, const P3 &point)
+  template <typename P3, typename Func, typename Quality>
+  void enqueue_work(Func f, const Quality &quality, tbb::task &parent_task, const P3 &point)
   {
-    WorkItem *p_item = new ConcreteWorkItem<Func>(f);
+    WorkItem *p_item = new ConcreteWorkItem<Func>(f, quality);
     int index = m_stats.compute_index(point);
     p_item->set_index(index);
     WorkBatch &wb = m_tls_work_buffers[index].local();
@@ -593,7 +633,7 @@ public:
   // Constructors
   Dynamic_auto_worksharing_ds(const Bbox_3 &bbox)
     : NUM_WORK_ITEMS_PER_BATCH(
-        Concurrent_mesher_config::get_option<int>("num_work_items_per_batch"))
+        Concurrent_mesher_config::get().num_work_items_per_batch)
   {
     set_bbox(bbox);
   }
@@ -608,10 +648,10 @@ public:
     // We don't need it.
   }
 
-  template <typename P3, typename Func>
-  void enqueue_work(Func f, tbb::task &parent_task, const P3 &point)
+  template <typename P3, typename Func, typename Quality>
+  void enqueue_work(Func f, const Quality &quality, tbb::task &parent_task, const P3 &point)
   {
-    WorkItem *p_item = new ConcreteWorkItem<Func>(f);
+    WorkItem *p_item = new ConcreteWorkItem<Func, Quality>(f, quality);
     WorkBatch &workbuffer = m_tls_work_buffers.local();
     workbuffer.add_work_item(p_item);
     if (workbuffer.size() >= NUM_WORK_ITEMS_PER_BATCH)