libstdc++
partition.h
Go to the documentation of this file.
1 // -*- C++ -*-
2 
3 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 3, or (at your option) any later
9 // version.
10 
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file parallel/partition.h
26  * @brief Parallel implementation of std::partition(),
27  * std::nth_element(), and std::partial_sort().
28  * This file is a GNU parallel extension to the Standard C++ Library.
29  */
30 
31 // Written by Johannes Singler and Felix Putze.
32 
33 #ifndef _GLIBCXX_PARALLEL_PARTITION_H
34 #define _GLIBCXX_PARALLEL_PARTITION_H 1
35 
37 #include <parallel/sort.h>
38 #include <parallel/random_number.h>
39 #include <bits/stl_algo.h>
40 #include <parallel/parallel.h>
41 
42 /** @brief Decide whether to declare certain variables volatile. */
43 #define _GLIBCXX_VOLATILE volatile
44 
45 namespace __gnu_parallel
46 {
47 /** @brief Parallel implementation of std::partition.
48  * @param begin Begin iterator of input sequence to split.
49  * @param end End iterator of input sequence to split.
50  * @param pred Partition predicate, possibly including some kind of pivot.
51  * @param num_threads Maximum number of threads to use for this task.
52  * @return Number of elements not fulfilling the predicate. */
53 template<typename RandomAccessIterator, typename Predicate>
54  typename std::iterator_traits<RandomAccessIterator>::difference_type
55  parallel_partition(RandomAccessIterator begin, RandomAccessIterator end,
56  Predicate pred, thread_index_t num_threads)
57  {
59  typedef typename traits_type::value_type value_type;
60  typedef typename traits_type::difference_type difference_type;
61 
62  difference_type n = end - begin;
63 
64  _GLIBCXX_CALL(n)
65 
66  const _Settings& __s = _Settings::get();
67 
68  // Shared.
69  _GLIBCXX_VOLATILE difference_type left = 0, right = n - 1;
70  _GLIBCXX_VOLATILE difference_type leftover_left, leftover_right;
71  _GLIBCXX_VOLATILE difference_type leftnew, rightnew;
72 
73  bool* reserved_left = NULL, * reserved_right = NULL;
74 
75  difference_type chunk_size = __s.partition_chunk_size;
76 
77  omp_lock_t result_lock;
78  omp_init_lock(&result_lock);
79 
80  //at least two chunks per thread
81  if(right - left + 1 >= 2 * num_threads * chunk_size)
82 # pragma omp parallel num_threads(num_threads)
83  {
84 # pragma omp single
85  {
86  num_threads = omp_get_num_threads();
87  reserved_left = new bool[num_threads];
88  reserved_right = new bool[num_threads];
89 
90  if (__s.partition_chunk_share > 0.0)
91  chunk_size = std::max<difference_type>(__s.partition_chunk_size,
92  (double)n * __s.partition_chunk_share
93  / (double)num_threads);
94  else
95  chunk_size = __s.partition_chunk_size;
96  }
97 
98  while (right - left + 1 >= 2 * num_threads * chunk_size)
99  {
100 # pragma omp single
101  {
102  difference_type num_chunks = (right - left + 1) / chunk_size;
103 
104  for (int r = 0; r < num_threads; ++r)
105  {
106  reserved_left[r] = false;
107  reserved_right[r] = false;
108  }
109  leftover_left = 0;
110  leftover_right = 0;
111  } //implicit barrier
112 
113  // Private.
114  difference_type thread_left, thread_left_border,
115  thread_right, thread_right_border;
116  thread_left = left + 1;
117 
118  // Just to satisfy the condition below.
119  thread_left_border = thread_left - 1;
120  thread_right = n - 1;
121  thread_right_border = thread_right + 1;
122 
123  bool iam_finished = false;
124  while (!iam_finished)
125  {
126  if (thread_left > thread_left_border)
127  {
128  omp_set_lock(&result_lock);
129  if (left + (chunk_size - 1) > right)
130  iam_finished = true;
131  else
132  {
133  thread_left = left;
134  thread_left_border = left + (chunk_size - 1);
135  left += chunk_size;
136  }
137  omp_unset_lock(&result_lock);
138  }
139 
140  if (thread_right < thread_right_border)
141  {
142  omp_set_lock(&result_lock);
143  if (left > right - (chunk_size - 1))
144  iam_finished = true;
145  else
146  {
147  thread_right = right;
148  thread_right_border = right - (chunk_size - 1);
149  right -= chunk_size;
150  }
151  omp_unset_lock(&result_lock);
152  }
153 
154  if (iam_finished)
155  break;
156 
157  // Swap as usual.
158  while (thread_left < thread_right)
159  {
160  while (pred(begin[thread_left])
161  && thread_left <= thread_left_border)
162  ++thread_left;
163  while (!pred(begin[thread_right])
164  && thread_right >= thread_right_border)
165  --thread_right;
166 
167  if (thread_left > thread_left_border
168  || thread_right < thread_right_border)
169  // Fetch new chunk(s).
170  break;
171 
172  std::swap(begin[thread_left], begin[thread_right]);
173  ++thread_left;
174  --thread_right;
175  }
176  }
177 
178  // Now swap the leftover chunks to the right places.
179  if (thread_left <= thread_left_border)
180 # pragma omp atomic
181  ++leftover_left;
182  if (thread_right >= thread_right_border)
183 # pragma omp atomic
184  ++leftover_right;
185 
186 # pragma omp barrier
187 
188 # pragma omp single
189  {
190  leftnew = left - leftover_left * chunk_size;
191  rightnew = right + leftover_right * chunk_size;
192  }
193 
194 # pragma omp barrier
195 
196  // <=> thread_left_border + (chunk_size - 1) >= leftnew
197  if (thread_left <= thread_left_border
198  && thread_left_border >= leftnew)
199  {
200  // Chunk already in place, reserve spot.
201  reserved_left[(left - (thread_left_border + 1)) / chunk_size]
202  = true;
203  }
204 
205  // <=> thread_right_border - (chunk_size - 1) <= rightnew
206  if (thread_right >= thread_right_border
207  && thread_right_border <= rightnew)
208  {
209  // Chunk already in place, reserve spot.
210  reserved_right[((thread_right_border - 1) - right)
211  / chunk_size] = true;
212  }
213 
214 # pragma omp barrier
215 
216  if (thread_left <= thread_left_border
217  && thread_left_border < leftnew)
218  {
219  // Find spot and swap.
220  difference_type swapstart = -1;
221  omp_set_lock(&result_lock);
222  for (int r = 0; r < leftover_left; ++r)
223  if (!reserved_left[r])
224  {
225  reserved_left[r] = true;
226  swapstart = left - (r + 1) * chunk_size;
227  break;
228  }
229  omp_unset_lock(&result_lock);
230 
231 #if _GLIBCXX_ASSERTIONS
232  _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
233 #endif
234 
235  std::swap_ranges(begin + thread_left_border
236  - (chunk_size - 1),
237  begin + thread_left_border + 1,
238  begin + swapstart);
239  }
240 
241  if (thread_right >= thread_right_border
242  && thread_right_border > rightnew)
243  {
244  // Find spot and swap
245  difference_type swapstart = -1;
246  omp_set_lock(&result_lock);
247  for (int r = 0; r < leftover_right; ++r)
248  if (!reserved_right[r])
249  {
250  reserved_right[r] = true;
251  swapstart = right + r * chunk_size + 1;
252  break;
253  }
254  omp_unset_lock(&result_lock);
255 
256 #if _GLIBCXX_ASSERTIONS
257  _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
258 #endif
259 
260  std::swap_ranges(begin + thread_right_border,
261  begin + thread_right_border + chunk_size,
262  begin + swapstart);
263  }
264 #if _GLIBCXX_ASSERTIONS
265 # pragma omp barrier
266 
267 # pragma omp single
268  {
269  for (int r = 0; r < leftover_left; ++r)
270  _GLIBCXX_PARALLEL_ASSERT(reserved_left[r]);
271  for (int r = 0; r < leftover_right; ++r)
272  _GLIBCXX_PARALLEL_ASSERT(reserved_right[r]);
273  }
274 
275 # pragma omp barrier
276 #endif
277 
278 # pragma omp barrier
279 
280  left = leftnew;
281  right = rightnew;
282  }
283 # pragma omp flush(left, right)
284  } // end "recursion" //parallel
285 
286  difference_type final_left = left, final_right = right;
287 
288  while (final_left < final_right)
289  {
290  // Go right until key is geq than pivot.
291  while (pred(begin[final_left]) && final_left < final_right)
292  ++final_left;
293 
294  // Go left until key is less than pivot.
295  while (!pred(begin[final_right]) && final_left < final_right)
296  --final_right;
297 
298  if (final_left == final_right)
299  break;
300  std::swap(begin[final_left], begin[final_right]);
301  ++final_left;
302  --final_right;
303  }
304 
305  // All elements on the left side are < piv, all elements on the
306  // right are >= piv
307  delete[] reserved_left;
308  delete[] reserved_right;
309 
310  omp_destroy_lock(&result_lock);
311 
312  // Element "between" final_left and final_right might not have
313  // been regarded yet
314  if (final_left < n && !pred(begin[final_left]))
315  // Really swapped.
316  return final_left;
317  else
318  return final_left + 1;
319  }
320 
321 /**
322  * @brief Parallel implementation of std::nth_element().
323  * @param begin Begin iterator of input sequence.
324  * @param nth Iterator of element that must be in position afterwards.
325  * @param end End iterator of input sequence.
326  * @param comp Comparator.
327  */
328 template<typename RandomAccessIterator, typename Comparator>
329  void
330  parallel_nth_element(RandomAccessIterator begin, RandomAccessIterator nth,
331  RandomAccessIterator end, Comparator comp)
332  {
333  typedef std::iterator_traits<RandomAccessIterator> traits_type;
334  typedef typename traits_type::value_type value_type;
335  typedef typename traits_type::difference_type difference_type;
336 
337  _GLIBCXX_CALL(end - begin)
338 
339  RandomAccessIterator split;
340  random_number rng;
341 
342  const _Settings& __s = _Settings::get();
343  difference_type minimum_length = std::max<difference_type>(2,
345 
346  // Break if input range to small.
347  while (static_cast<sequence_index_t>(end - begin) >= minimum_length)
348  {
349  difference_type n = end - begin;
350 
351  RandomAccessIterator pivot_pos = begin + rng(n);
352 
353  // Swap pivot_pos value to end.
354  if (pivot_pos != (end - 1))
355  std::swap(*pivot_pos, *(end - 1));
356  pivot_pos = end - 1;
357 
358  // XXX Comparator must have first_value_type, second_value_type,
359  // result_type
360  // Comparator == __gnu_parallel::lexicographic<S, int,
361  // __gnu_parallel::less<S, S> >
362  // pivot_pos == std::pair<S, int>*
363  // XXX binder2nd only for RandomAccessIterators??
365  pred(comp, *pivot_pos);
366 
367  // Divide, leave pivot unchanged in last place.
368  RandomAccessIterator split_pos1, split_pos2;
369  split_pos1 = begin + parallel_partition(begin, end - 1, pred,
370  get_max_threads());
371 
372  // Left side: < pivot_pos; right side: >= pivot_pos
373 
374  // Swap pivot back to middle.
375  if (split_pos1 != pivot_pos)
376  std::swap(*split_pos1, *pivot_pos);
377  pivot_pos = split_pos1;
378 
379  // In case all elements are equal, split_pos1 == 0
380  if ((split_pos1 + 1 - begin) < (n >> 7)
381  || (end - split_pos1) < (n >> 7))
382  {
383  // Very unequal split, one part smaller than one 128th
384  // elements not strictly larger than the pivot.
387  pred(__gnu_parallel::binder1st<Comparator, value_type,
388  value_type, bool>(comp, *pivot_pos));
389 
390  // Find other end of pivot-equal range.
391  split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
392  end, pred);
393  }
394  else
395  // Only skip the pivot.
396  split_pos2 = split_pos1 + 1;
397 
398  // Compare iterators.
399  if (split_pos2 <= nth)
400  begin = split_pos2;
401  else if (nth < split_pos1)
402  end = split_pos1;
403  else
404  break;
405  }
406 
407  // Only at most _Settings::partition_minimal_n elements left.
408  __gnu_sequential::nth_element(begin, nth, end, comp);
409  }
410 
411 /** @brief Parallel implementation of std::partial_sort().
412 * @param begin Begin iterator of input sequence.
413 * @param middle Sort until this position.
414 * @param end End iterator of input sequence.
415 * @param comp Comparator. */
416 template<typename RandomAccessIterator, typename Comparator>
417  void
418  parallel_partial_sort(RandomAccessIterator begin,
419  RandomAccessIterator middle,
420  RandomAccessIterator end, Comparator comp)
421  {
422  parallel_nth_element(begin, middle, end, comp);
423  std::sort(begin, middle, comp);
424  }
425 
426 } //namespace __gnu_parallel
427 
428 #undef _GLIBCXX_VOLATILE
429 
430 #endif /* _GLIBCXX_PARALLEL_PARTITION_H */