libstdc++
multiway_mergesort.h
Go to the documentation of this file.
1 // -*- C++ -*-
2 
3 // Copyright (C) 2007-2014 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 3, or (at your option) any later
9 // version.
10 
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file parallel/multiway_mergesort.h
26  * @brief Parallel multiway merge sort.
27  * This file is a GNU parallel extension to the Standard C++ Library.
28  */
29 
30 // Written by Johannes Singler.
31 
32 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
33 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
34 
35 #include <vector>
36 
38 #include <bits/stl_algo.h>
39 #include <parallel/parallel.h>
41 
42 namespace __gnu_parallel
43 {
44  /** @brief Subsequence description. */
45  template<typename _DifferenceTp>
46  struct _Piece
47  {
48  typedef _DifferenceTp _DifferenceType;
49 
50  /** @brief Begin of subsequence. */
51  _DifferenceType _M_begin;
52 
53  /** @brief End of subsequence. */
54  _DifferenceType _M_end;
55  };
56 
57  /** @brief Data accessed by all threads.
58  *
59  * PMWMS = parallel multiway mergesort */
60  template<typename _RAIter>
62  {
63  typedef std::iterator_traits<_RAIter> _TraitsType;
64  typedef typename _TraitsType::value_type _ValueType;
65  typedef typename _TraitsType::difference_type _DifferenceType;
66 
67  /** @brief Number of threads involved. */
69 
70  /** @brief Input __begin. */
71  _RAIter _M_source;
72 
73  /** @brief Start indices, per thread. */
74  _DifferenceType* _M_starts;
75 
76  /** @brief Storage in which to sort. */
77  _ValueType** _M_temporary;
78 
79  /** @brief Samples. */
80  _ValueType* _M_samples;
81 
82  /** @brief Offsets to add to the found positions. */
83  _DifferenceType* _M_offsets;
84 
85  /** @brief Pieces of data to merge @c [thread][__sequence] */
87  };
88 
89  /**
90  * @brief Select _M_samples from a sequence.
91  * @param __sd Pointer to algorithm data. _Result will be placed in
92  * @c __sd->_M_samples.
93  * @param __num_samples Number of _M_samples to select.
94  */
95  template<typename _RAIter, typename _DifferenceTp>
96  void
98  _DifferenceTp __num_samples)
99  {
100  typedef std::iterator_traits<_RAIter> _TraitsType;
101  typedef typename _TraitsType::value_type _ValueType;
102  typedef _DifferenceTp _DifferenceType;
103 
104  _ThreadIndex __iam = omp_get_thread_num();
105 
106  _DifferenceType* __es = new _DifferenceType[__num_samples + 2];
107 
108  __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
109  __num_samples + 1, __es);
110 
111  for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
112  ::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
113  _ValueType(__sd->_M_source[__sd->_M_starts[__iam]
114  + __es[__i + 1]]);
115 
116  delete[] __es;
117  }
118 
119  /** @brief Split consistently. */
120  template<bool __exact, typename _RAIter,
121  typename _Compare, typename _SortingPlacesIterator>
123  { };
124 
125  /** @brief Split by exact splitting. */
126  template<typename _RAIter, typename _Compare,
127  typename _SortingPlacesIterator>
128  struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator>
129  {
130  void
131  operator()(const _ThreadIndex __iam,
133  _Compare& __comp,
134  const typename
135  std::iterator_traits<_RAIter>::difference_type
136  __num_samples) const
137  {
138 # pragma omp barrier
139 
140  std::vector<std::pair<_SortingPlacesIterator,
141  _SortingPlacesIterator> >
142  __seqs(__sd->_M_num_threads);
143  for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
144  __seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
145  __sd->_M_temporary[__s]
146  + (__sd->_M_starts[__s + 1]
147  - __sd->_M_starts[__s]));
148 
150 
151  // if not last thread
152  if (__iam < __sd->_M_num_threads - 1)
153  multiseq_partition(__seqs.begin(), __seqs.end(),
154  __sd->_M_starts[__iam + 1], __offsets.begin(),
155  __comp);
156 
157  for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
158  {
159  // for each sequence
160  if (__iam < (__sd->_M_num_threads - 1))
161  __sd->_M_pieces[__iam][__seq]._M_end
162  = __offsets[__seq] - __seqs[__seq].first;
163  else
164  // very end of this sequence
165  __sd->_M_pieces[__iam][__seq]._M_end =
166  __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
167  }
168 
169 # pragma omp barrier
170 
171  for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
172  {
173  // For each sequence.
174  if (__iam > 0)
175  __sd->_M_pieces[__iam][__seq]._M_begin =
176  __sd->_M_pieces[__iam - 1][__seq]._M_end;
177  else
178  // Absolute beginning.
179  __sd->_M_pieces[__iam][__seq]._M_begin = 0;
180  }
181  }
182  };
183 
184  /** @brief Split by sampling. */
185  template<typename _RAIter, typename _Compare,
186  typename _SortingPlacesIterator>
187  struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator>
188  {
189  void
190  operator()(const _ThreadIndex __iam,
192  _Compare& __comp,
193  const typename
194  std::iterator_traits<_RAIter>::difference_type
195  __num_samples) const
196  {
197  typedef std::iterator_traits<_RAIter> _TraitsType;
198  typedef typename _TraitsType::value_type _ValueType;
199  typedef typename _TraitsType::difference_type _DifferenceType;
200 
201  __determine_samples(__sd, __num_samples);
202 
203 # pragma omp barrier
204 
205 # pragma omp single
206  __gnu_sequential::sort(__sd->_M_samples,
207  __sd->_M_samples
208  + (__num_samples * __sd->_M_num_threads),
209  __comp);
210 
211 # pragma omp barrier
212 
213  for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
214  {
215  // For each sequence.
216  if (__num_samples * __iam > 0)
217  __sd->_M_pieces[__iam][__s]._M_begin =
218  std::lower_bound(__sd->_M_temporary[__s],
219  __sd->_M_temporary[__s]
220  + (__sd->_M_starts[__s + 1]
221  - __sd->_M_starts[__s]),
222  __sd->_M_samples[__num_samples * __iam],
223  __comp)
224  - __sd->_M_temporary[__s];
225  else
226  // Absolute beginning.
227  __sd->_M_pieces[__iam][__s]._M_begin = 0;
228 
229  if ((__num_samples * (__iam + 1)) <
230  (__num_samples * __sd->_M_num_threads))
231  __sd->_M_pieces[__iam][__s]._M_end =
232  std::lower_bound(__sd->_M_temporary[__s],
233  __sd->_M_temporary[__s]
234  + (__sd->_M_starts[__s + 1]
235  - __sd->_M_starts[__s]),
236  __sd->_M_samples[__num_samples * (__iam + 1)],
237  __comp)
238  - __sd->_M_temporary[__s];
239  else
240  // Absolute end.
241  __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]
242  - __sd->_M_starts[__s]);
243  }
244  }
245  };
246 
247  template<bool __stable, typename _RAIter, typename _Compare>
248  struct __possibly_stable_sort
249  { };
250 
251  template<typename _RAIter, typename _Compare>
252  struct __possibly_stable_sort<true, _RAIter, _Compare>
253  {
254  void operator()(const _RAIter& __begin,
255  const _RAIter& __end, _Compare& __comp) const
256  { __gnu_sequential::stable_sort(__begin, __end, __comp); }
257  };
258 
259  template<typename _RAIter, typename _Compare>
260  struct __possibly_stable_sort<false, _RAIter, _Compare>
261  {
262  void operator()(const _RAIter __begin,
263  const _RAIter __end, _Compare& __comp) const
264  { __gnu_sequential::sort(__begin, __end, __comp); }
265  };
266 
267  template<bool __stable, typename Seq_RAIter,
268  typename _RAIter, typename _Compare,
269  typename DiffType>
270  struct __possibly_stable_multiway_merge
271  { };
272 
273  template<typename Seq_RAIter, typename _RAIter,
274  typename _Compare, typename _DiffType>
275  struct __possibly_stable_multiway_merge<true, Seq_RAIter,
276  _RAIter, _Compare, _DiffType>
277  {
278  void operator()(const Seq_RAIter& __seqs_begin,
279  const Seq_RAIter& __seqs_end,
280  const _RAIter& __target,
281  _Compare& __comp,
282  _DiffType __length_am) const
283  { stable_multiway_merge(__seqs_begin, __seqs_end, __target,
284  __length_am, __comp, sequential_tag()); }
285  };
286 
287  template<typename Seq_RAIter, typename _RAIter,
288  typename _Compare, typename _DiffType>
289  struct __possibly_stable_multiway_merge<false, Seq_RAIter,
290  _RAIter, _Compare, _DiffType>
291  {
292  void operator()(const Seq_RAIter& __seqs_begin,
293  const Seq_RAIter& __seqs_end,
294  const _RAIter& __target,
295  _Compare& __comp,
296  _DiffType __length_am) const
297  { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
298  __comp, sequential_tag()); }
299  };
300 
301  /** @brief PMWMS code executed by each thread.
302  * @param __sd Pointer to algorithm data.
303  * @param __comp Comparator.
304  */
305  template<bool __stable, bool __exact, typename _RAIter,
306  typename _Compare>
307  void
309  _Compare& __comp)
310  {
311  typedef std::iterator_traits<_RAIter> _TraitsType;
312  typedef typename _TraitsType::value_type _ValueType;
313  typedef typename _TraitsType::difference_type _DifferenceType;
314 
315  _ThreadIndex __iam = omp_get_thread_num();
316 
317  // Length of this thread's chunk, before merging.
318  _DifferenceType __length_local =
319  __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
320 
321  // Sort in temporary storage, leave space for sentinel.
322 
323  typedef _ValueType* _SortingPlacesIterator;
324 
325  __sd->_M_temporary[__iam] =
326  static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
327  * (__length_local + 1)));
328 
329  // Copy there.
330  std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
331  __sd->_M_source + __sd->_M_starts[__iam]
332  + __length_local,
333  __sd->_M_temporary[__iam]);
334 
335  __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
336  (__sd->_M_temporary[__iam],
337  __sd->_M_temporary[__iam] + __length_local,
338  __comp);
339 
340  // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
341  // __sd->_M_temporary[__iam] + __length_local.
342 
343  // No barrier here: Synchronization is done by the splitting routine.
344 
345  _DifferenceType __num_samples =
348  (__iam, __sd, __comp, __num_samples);
349 
350  // Offset from __target __begin, __length after merging.
351  _DifferenceType __offset = 0, __length_am = 0;
352  for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
353  {
354  __length_am += (__sd->_M_pieces[__iam][__s]._M_end
355  - __sd->_M_pieces[__iam][__s]._M_begin);
356  __offset += __sd->_M_pieces[__iam][__s]._M_begin;
357  }
358 
359  typedef std::vector<
361  _SeqVector;
362  _SeqVector __seqs(__sd->_M_num_threads);
363 
364  for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
365  {
366  __seqs[__s] =
367  std::make_pair(__sd->_M_temporary[__s]
368  + __sd->_M_pieces[__iam][__s]._M_begin,
369  __sd->_M_temporary[__s]
370  + __sd->_M_pieces[__iam][__s]._M_end);
371  }
372 
373  __possibly_stable_multiway_merge<
374  __stable, typename _SeqVector::iterator,
375  _RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),
376  __sd->_M_source + __offset, __comp,
377  __length_am);
378 
379 # pragma omp barrier
380 
381  for (_DifferenceType __i = 0; __i < __length_local; ++__i)
382  __sd->_M_temporary[__iam][__i].~_ValueType();
383  ::operator delete(__sd->_M_temporary[__iam]);
384  }
385 
386  /** @brief PMWMS main call.
387  * @param __begin Begin iterator of sequence.
388  * @param __end End iterator of sequence.
389  * @param __comp Comparator.
390  * @param __num_threads Number of threads to use.
391  */
392  template<bool __stable, bool __exact, typename _RAIter,
393  typename _Compare>
394  void
395  parallel_sort_mwms(_RAIter __begin, _RAIter __end,
396  _Compare __comp,
397  _ThreadIndex __num_threads)
398  {
399  _GLIBCXX_CALL(__end - __begin)
400 
401  typedef std::iterator_traits<_RAIter> _TraitsType;
402  typedef typename _TraitsType::value_type _ValueType;
403  typedef typename _TraitsType::difference_type _DifferenceType;
404 
405  _DifferenceType __n = __end - __begin;
406 
407  if (__n <= 1)
408  return;
409 
410  // at least one element per thread
411  if (__num_threads > __n)
412  __num_threads = static_cast<_ThreadIndex>(__n);
413 
414  // shared variables
416  _DifferenceType* __starts;
417  _DifferenceType __size;
418 
419 # pragma omp parallel num_threads(__num_threads)
420  {
421  __num_threads = omp_get_num_threads(); //no more threads than requested
422 
423 # pragma omp single
424  {
425  __sd._M_num_threads = __num_threads;
426  __sd._M_source = __begin;
427 
428  __sd._M_temporary = new _ValueType*[__num_threads];
429 
430  if (!__exact)
431  {
432  __size =
433  (_Settings::get().sort_mwms_oversampling * __num_threads - 1)
434  * __num_threads;
435  __sd._M_samples = static_cast<_ValueType*>
436  (::operator new(__size * sizeof(_ValueType)));
437  }
438  else
439  __sd._M_samples = 0;
440 
441  __sd._M_offsets = new _DifferenceType[__num_threads - 1];
442  __sd._M_pieces
443  = new std::vector<_Piece<_DifferenceType> >[__num_threads];
444  for (_ThreadIndex __s = 0; __s < __num_threads; ++__s)
445  __sd._M_pieces[__s].resize(__num_threads);
446  __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
447 
448  _DifferenceType __chunk_length = __n / __num_threads;
449  _DifferenceType __split = __n % __num_threads;
450  _DifferenceType __pos = 0;
451  for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
452  {
453  __starts[__i] = __pos;
454  __pos += ((__i < __split)
455  ? (__chunk_length + 1) : __chunk_length);
456  }
457  __starts[__num_threads] = __pos;
458  } //single
459 
460  // Now sort in parallel.
461  parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
462  } //parallel
463 
464  delete[] __starts;
465  delete[] __sd._M_temporary;
466 
467  if (!__exact)
468  {
469  for (_DifferenceType __i = 0; __i < __size; ++__i)
470  __sd._M_samples[__i].~_ValueType();
471  ::operator delete(__sd._M_samples);
472  }
473 
474  delete[] __sd._M_offsets;
475  delete[] __sd._M_pieces;
476  }
477 
478 } //namespace __gnu_parallel
479 
480 #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */
End-user include file. Provides advanced settings and tuning options. This file is a GNU parallel ext...
void parallel_sort_mwms(_RAIter __begin, _RAIter __end, _Compare __comp, _ThreadIndex __num_threads)
PMWMS main call.
constexpr pair< typename __decay_and_strip< _T1 >::__type, typename __decay_and_strip< _T2 >::__type > make_pair(_T1 &&__x, _T2 &&__y)
A convenience wrapper for creating a pair from two objects.
Definition: stl_pair.h:276
unsigned int sort_mwms_oversampling
Oversampling factor for parallel std::sort (MWMS).
Definition: settings.h:234
_OutputIterator __equally_split(_DifferenceType __n, _ThreadIndex __num_threads, _OutputIterator __s)
function to split a sequence into parts of almost equal size.
Definition: equally_split.h:48
Includes the original header files concerned with iterators except for stream iterators. This file is a GNU parallel extension to the Standard C++ Library.
_RAIterOut multiway_merge(_RAIterPairIterator __seqs_begin, _RAIterPairIterator __seqs_end, _RAIterOut __target, _DifferenceTp __length, _Compare __comp, __gnu_parallel::sequential_tag)
Multiway Merge Frontend.
uint16_t _ThreadIndex
Unsigned integer to index a thread number. The maximum thread number (for each processor) must fit in...
Definition: types.h:123
void multiseq_partition(_RanSeqs __begin_seqs, _RanSeqs __end_seqs, _RankType __rank, _RankIterator __begin_offsets, _Compare __comp=std::less< typename std::iterator_traits< typename std::iterator_traits< _RanSeqs >::value_type::first_type >::value_type >())
Splits several sorted sequences at a certain global __rank, resulting in a splitting point for each s...
_ForwardIterator uninitialized_copy(_InputIterator __first, _InputIterator __last, _ForwardIterator __result)
Copies the range [first,last) into result.
Subsequence description.
iterator begin() noexcept
Definition: stl_vector.h:547
_ThreadIndex _M_num_threads
Number of threads involved.
void __determine_samples(_PMWMSSortingData< _RAIter > *__sd, _DifferenceTp __num_samples)
Select _M_samples from a sequence.
GNU parallel code for public use.
static const _Settings & get()
Get the global settings.
_DifferenceType * _M_starts
Start indices, per thread.
Implementation of sequential and parallel multiway merge.
_ValueType ** _M_temporary
Storage in which to sort.
_DifferenceType _M_end
End of subsequence.
std::vector< _Piece< _DifferenceType > > * _M_pieces
Pieces of data to merge [thread][__sequence].
#define _GLIBCXX_CALL(__n)
Macro to produce log message when entering a function.
Struct holding two objects of arbitrary type.
Definition: stl_pair.h:96
A standard container which offers fixed time access to individual elements in any order...
Definition: stl_vector.h:214
_DifferenceType _M_begin
Begin of subsequence.
_DifferenceType * _M_offsets
Offsets to add to the found positions.
void parallel_sort_mwms_pu(_PMWMSSortingData< _RAIter > *__sd, _Compare &__comp)
PMWMS code executed by each thread.
Data accessed by all threads.