STLdoc
STLdocumentation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
amprt.h
Go to the documentation of this file.
1 /***
2 * ==++==
3 *
4 * Copyright (c) Microsoft Corporation. All rights reserved.
5 *
6 * ==--==
7 * =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
8 *
9 * amprt.h
10 *
11 * Define the C++ interfaces exported by the C++ AMP runtime
12 *
13 * =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
14 ****/
15 #pragma once
16 
17 #if defined(WINAPI_FAMILY) && (WINAPI_FAMILY == WINAPI_FAMILY_PHONE_APP)
18  #error ERROR: C++ AMP runtime is not supported for applications where WINAPI_FAMILY == WINAPI_FAMILY_PHONE_APP.
19 #endif
20 
21 #if !(defined (_M_X64) || defined (_M_IX86) || defined (_M_ARM) || defined (_M_ARM64) )
22  #error ERROR: C++ AMP runtime is supported only on X64, X86, ARM, and _M_ARM64 architectures.
23 #endif
24 
25 #if defined (_M_CEE)
26  #error ERROR: C++ AMP runtime is not supported when compiling /clr.
27 #endif
28 
29 #ifndef __cplusplus
30  #error ERROR: C++ AMP runtime is supported only for C++.
31 #endif
32 
33 #if !defined(_CXXAMP)
34 
35 #if defined(_DEBUG)
36  #pragma comment(lib, "vcampd")
37 #else // _DEBUG
38  #pragma comment(lib, "vcamp")
39 #endif // _DEBUG
40 
41 #endif // _CXXAMP
42 
43 #if !defined(_CXXAMP)
44 
45 #define __GPU restrict(amp,cpu)
46 #define __GPU_ONLY restrict(amp)
47 #define __CPU_ONLY restrict(cpu)
48 
49 #else
50 
51 #define __GPU
52 #define __GPU_ONLY
53 #define __CPU_ONLY
54 
55 #endif // _CXXAMP
56 
57 #include <unknwn.h>
58 #include <crtdbg.h>
59 #include <string>
60 #include <vector>
61 #include <iterator>
62 
63 #if defined(_CXXAMP)
64 #include <strsafe.h>
65 #endif // _CXXAMP
66 
67 #include <future>
68 #include <functional>
69 #include <map>
70 #include <unordered_map>
71 #include <set>
72 #include <unordered_set>
73 #include <concrt.h>
74 #include <type_traits>
75 
76 #include "amprt_exceptions.h"
77 
78 #if !defined(_AMPIMP)
79 #define _AMPIMP __declspec(dllimport)
80 #endif
81 
82 #pragma pack(push,8)
83 
84 // Part of runtime-compiler interface
85 extern "C"
86 {
87  // Access mode of fields
89  {
91  _Read_access = (1 << 0),
92  _Write_access = (1 << 1),
93  _Is_array_mode = (1 << 30),
95  };
96 }
97 
98 namespace Concurrency
99 {
104  {
106  access_type_read = (1 << 0),
107  access_type_write = (1 << 1),
109  access_type_auto = (1 << 31),
110  };
111 
112 // Forward declarations
113 class accelerator_view;
114 class accelerator;
115 
116 namespace details
117 {
118  const size_t ERROR_MSG_BUFFER_SIZE = 1024;
119 
120  // A reference counter to be used as the base class for all reference counted types.
122  {
123  public:
124 
125  // Constructor.
127 
128  // Destructor.
129  virtual ~_Reference_counter() noexcept(false) {}
130 
131  // Add a reference.
132  // Thread-safe.
133  size_t _Add_reference()
134  {
135  return InterlockedIncrement(reinterpret_cast<LONG volatile*>(&_M_rc));
136  }
137 
138  // Remove a reference.
139  // Thread-safe.
141  {
142  _ASSERTE(_M_rc > 0);
143 
144  size_t refCount = InterlockedDecrement(reinterpret_cast<LONG volatile*>(&_M_rc));
145 
146  if (refCount == 0)
147  this->_Release();
148 
149  return refCount;
150  }
151 
152  // Release the counter
153  _AMPIMP void _Release();
154 
155  // Return the reference count value
157  {
158  return _M_rc;
159  }
160 
161  private:
162  size_t _M_rc;
163  };
164 
165  // A smart pointer to a reference counted object
166  // T must be a type derived from _Reference_counter
167  template <class T>
169  {
170  public:
171 
172  // Constructor
174  {
175  _Init();
176  }
177 
178  // Copy constructor
180  {
181  _Init();
182  }
183 
184  // Move constructor
186  {
187  _Other._M_obj_ptr = nullptr;
188  // No change to ref-count
189  }
190 
191  // Destructor
193  {
194  if (_M_obj_ptr != NULL) {
196  }
197  }
198 
199  // Assignment operator
201  {
202  if (_M_obj_ptr != _Other._M_obj_ptr)
203  {
204  T *oldPtr = _M_obj_ptr;
205  _M_obj_ptr = _Other._M_obj_ptr;
206  _Init();
207 
208  if (oldPtr != NULL) {
209  _UnInitialize(oldPtr);
210  }
211  }
212  return *this;
213  }
214 
215  // Move-assignment operator
217  {
218  if (_M_obj_ptr != _Other._M_obj_ptr)
219  {
220  T *oldPtr = _M_obj_ptr;
221  _M_obj_ptr = _Other._M_obj_ptr;
222  _Other._M_obj_ptr = nullptr;
223  // No change to ref-count of the adopted pointer.
224 
225  if (oldPtr != nullptr)
226  {
227  _UnInitialize(oldPtr);
228  }
229  }
230  return *this;
231  }
232 
233  _Ret_ T* operator->() const
234  {
235  return _M_obj_ptr;
236  }
237 
238  T& operator*() const
239  {
240  return *_M_obj_ptr;
241  }
242 
243  operator T*() const
244  {
245  return _M_obj_ptr;
246  }
247 
248  _Ret_ T* _Get_ptr() const
249  {
250  return _M_obj_ptr;
251  }
252 
253  private:
255 
256  void _Init()
257  {
258  if (_M_obj_ptr == NULL)
259  return;
260 
261  reinterpret_cast<_Reference_counter*>(_M_obj_ptr)->_Add_reference();
262  }
263 
264  static void _UnInitialize(_In_ T *_Obj_ptr)
265  {
266  reinterpret_cast<_Reference_counter*>(_Obj_ptr)->_Remove_reference();
267  }
268  };
269 
270  // Forward declarations
271  class _Trace;
272  class _Amp_runtime_trace;
273  class _Buffer;
274  class _Texture;
275  class _Sampler;
276  class _Ubiquitous_buffer;
277  class _D3D_interop;
278  class _Accelerator_view_impl;
279  class _CPU_accelerator_view_impl;
280  class _D3D_accelerator_view_impl;
281  class _Accelerator_impl;
282  class _Event_impl;
283  class _DPC_runtime_factory;
284  class _View_shape;
285  struct _Buffer_descriptor;
287  struct _DPC_shader_blob;
288  struct _View_info;
289 
290  // The enum specifies the base type for short vector type.
291  enum _Short_vector_base_type_id : unsigned int
292  {
299  _Invalid_type = 0xFFFFFFFF
300  };
301 
303 
304 } // namespace Concurrency::details
305 
314 
315 namespace details
316 {
317  // The _Event class.
318  class _Event
319  {
320  friend class _Buffer;
321  friend class _Texture;
322  friend class accelerator_view;
324 
325  public:
329  _AMPIMP _Event();
330 
334  _AMPIMP ~_Event();
335 
339  _AMPIMP _Event(const _Event & _Other);
340 
344  _AMPIMP _Event & operator=(const _Event & _Other);
345 
353 
360  _AMPIMP bool _Is_finished();
361 
365  _AMPIMP void _Get();
366 
374  _AMPIMP bool _Is_empty() const;
375 
383 
390  _AMPIMP _Event _Add_continuation(const std::function<_Event __cdecl ()> &_Continuation_task);
391 
395  _AMPIMP bool operator==(const _Event &_Other) const;
396 
400  _AMPIMP bool operator!=(const _Event &_Other) const;
401 
402  private:
403 
404  // Private constructor
405  _Event(_In_ _Event_impl* _Impl);
406 
407  _Event_impl_ptr _M_ptr_event_impl;
408  };
409 
411 
412  _Ret_ _Accelerator_view_impl* _Get_accelerator_view_impl_ptr(const accelerator_view& _Accl_view);
413  _Ret_ _Accelerator_impl* _Get_accelerator_impl_ptr(const accelerator& _Accl);
414  _Event _Get_access_async(const _View_key _Key, accelerator_view _Av, _Access_mode _Mode, _Buffer_ptr &_Buf_ptr);
415  unsigned int _Get_mipmap_levels(const _Texture *_Tex);
416 
418  {
419  if ((_Mode != _Read_access) &&
420  (_Mode != _Write_access) &&
421  (_Mode != _Read_write_access))
422  {
423  return false;
424  }
425 
426  return true;
427  }
428 
429  // Caution: Do not change this structure defintion.
430  // This struct is special and is processed by the FE to identify the buffers
431  // used in a parallel_for_each and to setup the _M_data_ptr with the appropriate
432  // buffer ptr value in the device code.
433  typedef struct _Buffer_descriptor
434  {
435  friend _Event _Get_access_async(const _View_key _Key, accelerator_view _Av, _Access_mode _Mode, _Buffer_ptr &_Buf_ptr);
436 
437  // _M_data_ptr points to the raw data underlying the buffer for accessing on host
438  mutable void *_M_data_ptr;
439 
440  private:
441  // _M_buffer_ptr points to a _Ubiquitous_buffer that holds the data in an 1D array.
442  // This is private to ensure that all assignments to this data member
443  // only happen through public functions which properly manage the
444  // ref count of the underlying buffer
446 
447  public:
448  // _M_curr_cpu_access_mode specifies the current access mode of the data on the
449  // cpu accelerator_view specified at the time of registration of this view
451 
452  // _M_type_acess_mode specifies the access mode of the overlay type
453  // array_views set it to the appropriate access mode and for arrays it is
454  // always _Is_array_mode.
456 
457  public:
458  // Public functions
459 
460  // Default constructor
462  : _M_data_ptr(NULL), _M_buffer_ptr(NULL),
463  _M_curr_cpu_access_mode(_No_access), _M_type_access_mode(_Is_array_mode)
464  {
465  }
466 
467  _Buffer_descriptor(_In_ void *_Data_ptr, _In_ _Ubiquitous_buffer *_Buffer_ptr,
468  _Access_mode _Curr_cpu_access_mode, _Access_mode _Type_mode) __GPU
469  : _M_data_ptr(_Data_ptr), _M_buffer_ptr(NULL),
470  _M_curr_cpu_access_mode(_Curr_cpu_access_mode), _M_type_access_mode(_Type_mode)
471  {
472  _Set_buffer_ptr(_Buffer_ptr);
473  }
474 
475  // Destructor
477  {
479  }
480 
481  // Copy constructor
483  : _M_data_ptr(_Other._M_data_ptr), _M_buffer_ptr(NULL),
484  _M_curr_cpu_access_mode(_Other._M_curr_cpu_access_mode), _M_type_access_mode(_Other._M_type_access_mode)
485  {
486  _Set_buffer_ptr(_Other._M_buffer_ptr);
487  }
488 
489  // Assignment operator
491  {
492  if (this != &_Other)
493  {
494  _M_data_ptr = _Other._M_data_ptr;
495  _M_curr_cpu_access_mode = _Other._M_curr_cpu_access_mode;
496  _M_type_access_mode = _Other._M_type_access_mode;
497  _Set_buffer_ptr(_Other._M_buffer_ptr);
498  }
499 
500  return *this;
501  }
502 
504  {
505  return _M_buffer_ptr;
506  }
507 
509  {
510  if (_M_buffer_ptr != _Buffer_ptr)
511  {
512  if (_M_buffer_ptr != NULL) {
513  reinterpret_cast<_Reference_counter*>(_M_buffer_ptr)->_Remove_reference();
514  }
515 
516  _M_buffer_ptr = _Buffer_ptr;
517 
518  if (_M_buffer_ptr != NULL) {
519  reinterpret_cast<_Reference_counter*>(_M_buffer_ptr)->_Add_reference();
520  }
521  }
522  }
523 
524 #if !defined(_CXXAMP)
526  {
527  // No need to set the buffer ptr on the GPU
528  _M_buffer_ptr = NULL;
529  }
530 #endif // _CXXAMP
531 
532  bool _Is_array() const
533  {
534  return (_M_type_access_mode == _Is_array_mode);
535  }
536 
537  _Ret_ _View_key _Get_view_key()
538  {
539  return this;
540  }
541 
542  const _View_key _Get_view_key() const
543  {
544  return ((const _View_key)(this));
545  }
546 
547  _AMPIMP void _Get_CPU_access(_Access_mode _Requested_mode) const;
548 
550 
551  // Caution: Do not change this structure defintion.
552  // This struct is special and is processed by the FE to identify the textures
553  // used in a parallel_for_each and to setup the _M_data_ptr with the appropriate
554  // texture ptr value in the device code.
555  typedef struct _Texture_descriptor
556  {
557  // _M_data_ptr points to the raw data underlying the texture
558  mutable IUnknown *_M_data_ptr;
559 
560  private:
561  // _M_texture_ptr points to a _Texture that holds the data
562  // This is private to ensure that all assignments to this data member
563  // only happen through public functions which properly manage the
564  // ref count of the underlying texture
566 
567  // The index of the most detailed (largest in size) mipmap level for the texture (or texture view)
568  // This value is always zero for the texture and might be non-zero for the texture views
570 
571  // Number of accessible mipmap levels for the texture (or texture view),
572  // e.g. if the texture has 3 mipmap levels ([0, 1, 2]),
573  // then read-only texture view with most detailed mipmap level equal to 1, can have 1 or 2 mipmap levels ([1] or [1, 2]).
574  // Further texture_views created on top of the texture view defined above can only narrow down the range of accessible mipmap levels.
575  unsigned int _M_view_mipmap_levels;
576 
577  public:
578  // Public functions
579 
580  // Default constructor
582  : _M_data_ptr(NULL), _M_texture_ptr(NULL), _M_most_detailed_mipmap_level(0), _M_view_mipmap_levels(0)
583  {
584  // Enables move constructor
585  }
586 
587  // Constructor for the texture
588  _Texture_descriptor(unsigned int _Most_detailed_mipmap_level, unsigned int _View_mipmap_levels) __GPU
589  : _M_data_ptr(NULL), _M_texture_ptr(NULL), _M_most_detailed_mipmap_level(_Most_detailed_mipmap_level), _M_view_mipmap_levels(_View_mipmap_levels)
590  {
591  }
592 
593  // Constructor for the interop texture
596  {
597  _Set_texture_ptr(_Texture_ptr);
598 
599  // Adopt number of mipmap levels from underlying texture object
600  _M_view_mipmap_levels = _Get_mipmap_levels(_M_texture_ptr);
601  }
602 
603  // Destructor
605  {
607  }
608 
609  // Copy constructor
611  : _M_data_ptr(_Other._M_data_ptr), _M_texture_ptr(NULL),
612  _M_most_detailed_mipmap_level(_Other._M_most_detailed_mipmap_level), _M_view_mipmap_levels(_Other._M_view_mipmap_levels)
613  {
614  _Set_texture_ptr(_Other._M_texture_ptr);
615  }
616 
617  // Copy constructor with ability to redefine mipmap information
618  _Texture_descriptor(const _Texture_descriptor &_Other, unsigned int _Most_detailed_mipmap_level, unsigned int _View_mipmap_levels) __GPU
619  : _M_data_ptr(_Other._M_data_ptr), _M_texture_ptr(NULL),
620  _M_most_detailed_mipmap_level(_Most_detailed_mipmap_level), _M_view_mipmap_levels(_View_mipmap_levels)
621  {
622  _Set_texture_ptr(_Other._M_texture_ptr);
623  }
624 
625  // Assignment operator
627  {
628  if (this != &_Other)
629  {
630  _M_data_ptr = _Other._M_data_ptr;
631  _Set_texture_ptr(_Other._M_texture_ptr);
632  _M_most_detailed_mipmap_level = _Other._M_most_detailed_mipmap_level;
633  _M_view_mipmap_levels = _Other._M_view_mipmap_levels;
634  }
635 
636  return *this;
637  }
638 
639  // Move constructor
641  {
642  *this = std::move(_Other);
643  }
644 
645  bool operator==(const _Texture_descriptor &_Other) const __GPU
646  {
647  return _M_texture_ptr == _Other._M_texture_ptr
648  && _M_data_ptr == _Other._M_data_ptr
649  && _M_most_detailed_mipmap_level == _Other._M_most_detailed_mipmap_level
650  && _M_view_mipmap_levels == _Other._M_view_mipmap_levels;
651  }
652 
654  {
655  _ASSERTE(_M_texture_ptr);
656  return _M_texture_ptr;
657  }
658 
660  {
662  }
663 
664  unsigned int _Get_view_mipmap_levels() const __GPU
665  {
666  return _M_view_mipmap_levels;
667  }
668 
669  void _Set_view_mipmap_levels(unsigned int _View_mipmap_levels) __CPU_ONLY
670  {
671  _M_view_mipmap_levels = _View_mipmap_levels;
672  }
673 
675  {
676  if (_M_texture_ptr != _Texture_ptr)
677  {
678  if (_M_texture_ptr != NULL) {
679  reinterpret_cast<_Reference_counter*>(_M_texture_ptr)->_Remove_reference();
680  }
681 
682  _M_texture_ptr = _Texture_ptr;
683 
684  if (_M_texture_ptr != NULL) {
685  reinterpret_cast<_Reference_counter*>(_M_texture_ptr)->_Add_reference();
686  }
687  }
688  }
689 
690 #if !defined(_CXXAMP)
692  {
693  // No need to set the texture ptr on the GPU
694  _M_texture_ptr = NULL;
695  }
696 #endif // _CXXAMP
697 
698  // This helper function is used to determine aliasing and copy violations
700  {
701  _ASSERTE(_Other);
702 
703  if (this->_Get_texture_ptr() != _Other->_Get_texture_ptr())
704  {
705  return false;
706  }
707 
708  return !((_M_most_detailed_mipmap_level < _Other->_M_most_detailed_mipmap_level) ? ((_M_most_detailed_mipmap_level + _M_view_mipmap_levels - 1) < _Other->_M_most_detailed_mipmap_level)
709  : ((_Other->_M_most_detailed_mipmap_level + _Other->_M_view_mipmap_levels - 1) < _M_most_detailed_mipmap_level));
710  }
711 
713 
714  // Caution: Do not change this structure defintion.
715  // This struct is special and is processed by the FE to identify the samplers
716  // used in a parallel_for_each.
717  typedef struct _Sampler_descriptor
718  {
719  // _M_data_ptr points to the sampler on accelerator
720  mutable void *_M_data_ptr;
721 
722  private:
723  // _M_sampler_ptr points to a _Sampler that holds the underlying sampler
724  // representation. This is private to ensure that all assignments to this data member
725  // only happen through public functions which properly manage the
726  // ref count of the underlying _Sampler object.
728 
729  public:
730  // Public functions
731 
732  // Default constructor
734  : _M_data_ptr(NULL), _M_sampler_ptr(NULL)
735  {
736  }
737 
740  {
741  _Set_sampler_ptr(_Sampler_ptr);
742  }
743 
744  // Destructor
746  {
748  }
749 
750  // Copy constructor
752  : _M_data_ptr(_Other._M_data_ptr), _M_sampler_ptr(NULL)
753  {
754  _Set_sampler_ptr(_Other._M_sampler_ptr);
755  }
756 
757  // Assignment operator
759  {
760  if (this != &_Other)
761  {
762  _M_data_ptr = _Other._M_data_ptr;
763  _Set_sampler_ptr(_Other._M_sampler_ptr);
764  }
765 
766  return *this;
767  }
768 
769  // Move constructor
771  {
772  *this = std::move(_Other);
773  }
774 
775  bool operator==(const _Sampler_descriptor &_Other) const __GPU
776  {
777  return _M_sampler_ptr == _Other._M_sampler_ptr && _M_data_ptr == _Other._M_data_ptr;
778  }
779 
781  {
782  return _M_sampler_ptr;
783  }
784 
786  {
787  if (_M_sampler_ptr != _Sampler_ptr)
788  {
789  if (_M_sampler_ptr != NULL) {
790  reinterpret_cast<_Reference_counter*>(_M_sampler_ptr)->_Remove_reference();
791  }
792 
793  _M_sampler_ptr = _Sampler_ptr;
794 
795  if (_M_sampler_ptr != NULL) {
796  reinterpret_cast<_Reference_counter*>(_M_sampler_ptr)->_Add_reference();
797  }
798  }
799  }
800 
801 #if !defined(_CXXAMP)
803  {
804  // No need to set the sampler ptr on the GPU
805  _M_sampler_ptr = NULL;
806  }
807 #endif // _CXXAMP
808 
810 
811 } // namespace Concurrency::details
812 
813 // Forward declaration
814 class accelerator;
815 
816 namespace details
817 {
818  _AMPIMP size_t __cdecl _Get_num_devices();
819  _AMPIMP _Ret_ _Accelerator_impl_ptr * __cdecl _Get_devices();
820  _AMPIMP accelerator __cdecl _Select_default_accelerator();
821  _AMPIMP bool __cdecl _Set_default_accelerator(_Accelerator_impl_ptr _Accl);
822  _AMPIMP bool __cdecl _Is_D3D_accelerator_view(const accelerator_view& _Av);
823  _AMPIMP void __cdecl _Register_async_event(const _Event &_Ev, const std::shared_future<void> &_Shared_future);
824  _AMPIMP _Access_mode __cdecl _Get_recommended_buffer_host_access_mode(const accelerator_view &_Av);
825 }
826 
833 };
834 
835 namespace direct3d
836 {
846  _AMPIMP _Ret_ IUnknown * __cdecl get_device(const accelerator_view &_Av);
847 
861  _AMPIMP accelerator_view __cdecl create_accelerator_view(_In_ IUnknown *_D3D_device, queuing_mode _Qmode = queuing_mode_automatic);
862 
885  _AMPIMP accelerator_view __cdecl create_accelerator_view(accelerator& _Accelerator, bool _Disable_timeout, queuing_mode _Qmode = queuing_mode_automatic);
886 
898  _AMPIMP bool __cdecl is_timeout_disabled(const accelerator_view& _Accelerator_view);
899 
914  _AMPIMP void __cdecl d3d_access_lock(accelerator_view &_Av);
915 
925  _AMPIMP bool __cdecl d3d_access_try_lock(accelerator_view &_Av);
926 
934  _AMPIMP void __cdecl d3d_access_unlock(accelerator_view &_Av);
935 
941 
946  {
947  public:
957 
971 
976 
984  _AMPIMP scoped_d3d_access_lock(scoped_d3d_access_lock &&_Other);
985 
997  _AMPIMP scoped_d3d_access_lock& operator=(scoped_d3d_access_lock &&_Other);
998 
999  private:
1000  // No copy constructor
1001  scoped_d3d_access_lock(const scoped_d3d_access_lock &_Other);
1002 
1003  // No assignment operator
1004  scoped_d3d_access_lock & operator=(const scoped_d3d_access_lock &_Other);
1005 
1006  _Accelerator_view_impl_ptr _M_impl;
1007  };
1008 } // namespace direct3d
1009 
1014 {
1015  friend class accelerator_view;
1016 
1018 
1020 
1021  _AMPIMP friend accelerator_view __cdecl direct3d::create_accelerator_view(accelerator& _Accelerator, bool _Disable_timeout, queuing_mode _Qmode /* = queuing_mode_automatic */);
1022 
1023  friend _Ret_ details::_Accelerator_impl* details::_Get_accelerator_impl_ptr(const accelerator& _Accl);
1024 
1025 public:
1026 
1030  _AMPIMP static const wchar_t default_accelerator[];
1031 
1035  _AMPIMP static const wchar_t cpu_accelerator[];
1036 
1040  _AMPIMP static const wchar_t direct3d_warp[];
1041 
1045  _AMPIMP static const wchar_t direct3d_ref[];
1046 
1050  _AMPIMP accelerator();
1051 
1056  explicit accelerator(const std::wstring &_Device_path) : _M_impl(NULL)
1057  {
1058  _Init(_Device_path.c_str());
1059  }
1060 
1065 
1069  _AMPIMP accelerator(const accelerator &_Other);
1070 
1074  _AMPIMP accelerator &operator=(const accelerator &_Other);
1075 
1082  static inline std::vector<accelerator> get_all()
1083  {
1084  std::vector<accelerator> _AcceleratorVector;
1085  size_t _NumDevices = details::_Get_num_devices();
1086  for (size_t _I = 0; (_I < _NumDevices); ++_I)
1087  {
1088  _AcceleratorVector.push_back(details::_Get_devices()[_I]);
1089  }
1090 
1091  return _AcceleratorVector;
1092  }
1093 
1105  static inline bool set_default(const std::wstring& _Path)
1106  {
1107  accelerator _Accl(_Path);
1109  }
1110 
1120 
1125  {
1126  return _Get_device_path();
1127  }
1128 
1129  __declspec(property(get=get_device_path)) std::wstring device_path;
1130 
1134  _AMPIMP unsigned int get_version() const;
1135  __declspec(property(get=get_version)) unsigned int version; // hiword=major, loword=minor
1136 
1141  {
1142  return _Get_description();
1143  }
1144 
1145  __declspec(property(get=get_description)) std::wstring description;
1146 
1151  _AMPIMP bool get_is_debug() const;
1152  __declspec(property(get=get_is_debug)) bool is_debug;
1153 
1158  _AMPIMP bool get_is_emulated() const;
1159  __declspec(property(get=get_is_emulated)) bool is_emulated;
1160 
1165  _AMPIMP bool get_has_display() const;
1166  __declspec(property(get=get_has_display)) bool has_display;
1167 
1175  __declspec(property(get=get_supports_double_precision)) bool supports_double_precision;
1176 
1184  __declspec(property(get=get_supports_limited_double_precision)) bool supports_limited_double_precision;
1185 
1191  __declspec(property(get=get_supports_cpu_shared_memory)) bool supports_cpu_shared_memory;
1192 
1197  __declspec(property(get=get_default_view)) accelerator_view default_view;
1198 
1202  _AMPIMP size_t get_dedicated_memory() const;
1203  __declspec(property(get=get_dedicated_memory)) size_t dedicated_memory;
1204 
1209  __declspec(property(get=get_default_cpu_access_type)) access_type default_cpu_access_type;
1210 
1228  _AMPIMP bool set_default_cpu_access_type(access_type _Default_cpu_access_type);
1229 
1236 
1240  _AMPIMP bool operator==(const accelerator &_Other) const;
1241 
1245  _AMPIMP bool operator!=(const accelerator &_Other) const;
1246 
1247 private:
1248 
1249  // Private constructor
1250  _AMPIMP accelerator(_Accelerator_impl_ptr _Impl);
1251 
1252  // Private helper methods
1253  _AMPIMP const wchar_t *_Get_device_path() const;
1254  _AMPIMP const wchar_t *_Get_description() const;
1255 
1256  _AMPIMP void _Init(const wchar_t *_Path);
1257 
1258 private:
1259 
1260  _Accelerator_impl_ptr _M_impl;
1261 };
1262 
1267 {
1269 public:
1270 
1275  {
1276  }
1277 
1281  completion_future(const completion_future& _Other)
1282  : _M_shared_future(_Other._M_shared_future),
1283  _M_task(_Other._M_task)
1284  {
1285  }
1286 
1290  completion_future(completion_future&& _Other)
1291  : _M_shared_future(std::move(_Other._M_shared_future)),
1292  _M_task(std::move(_Other._M_task))
1293  {
1294  }
1295 
1300  {
1301  }
1302 
1306  completion_future& operator=(const completion_future& _Other)
1307  {
1308  if (this != &_Other) {
1309  _M_shared_future = _Other._M_shared_future;
1310  _M_task = _Other._M_task;
1311  }
1312 
1313  return (*this);
1314  }
1315 
1319  completion_future& operator=(completion_future&& _Other)
1320  {
1321  if (this != &_Other) {
1322  _M_shared_future = std::move(_Other._M_shared_future);
1323  _M_task = std::move(_Other._M_task);
1324  }
1325 
1326  return (*this);
1327  }
1328 
1334  void get() const
1335  {
1336  _M_shared_future.get();
1337  }
1338 
1347  bool valid() const
1348  {
1349  return _M_shared_future.valid();
1350  }
1351 
1355  void wait() const
1356  {
1357  _M_shared_future.wait();
1358  }
1359 
1369  template <class _Rep, class _Period>
1370  std::future_status wait_for(const std::chrono::duration<_Rep, _Period>& _Rel_time) const
1371  {
1372  return _M_shared_future.wait_for(_Rel_time);
1373  }
1374 
1384  template <class _Clock, class _Duration>
1385  std::future_status wait_until(const std::chrono::time_point<_Clock, _Duration>& _Abs_time) const
1386  {
1387  return _M_shared_future.wait_until(_Abs_time);
1388  }
1389 
1398  operator std::shared_future<void>() const
1399  {
1400  return _M_shared_future;
1401  }
1402 
1407  template <typename _Functor>
1408  void then(const _Functor &_Func) const
1409  {
1410  this->to_task().then(_Func);
1411  }
1412 
1422  {
1423  return _M_task;
1424  }
1425 
1426 private:
1427 
1428  // Private constructor
1429  completion_future(const std::shared_future<void> &_Shared_future,
1430  const concurrency::task<void>& _Task)
1431  : _M_shared_future(_Shared_future), _M_task(_Task)
1432  {
1433  }
1434 
1435  std::shared_future<void> _M_shared_future;
1437 };
1438 
1443 {
1444  friend class accelerator;
1445  friend class details::_Buffer;
1446  friend class details::_Texture;
1447  friend class details::_Sampler;
1450  friend class details::_D3D_accelerator_view_impl;
1451  friend class details::_CPU_accelerator_view_impl;
1453 
1454  _AMPIMP friend _Ret_ IUnknown * __cdecl direct3d::get_device(const accelerator_view &_Av);
1455 
1456  _AMPIMP friend accelerator_view __cdecl direct3d::create_accelerator_view(_In_ IUnknown *_D3D_device, queuing_mode qmode /* = queuing_mode_automatic */);
1457 
1458  _AMPIMP friend accelerator_view __cdecl direct3d::create_accelerator_view(accelerator& _Accelerator, bool _Disable_timeout, queuing_mode _Qmode /* = queuing_mode_automatic */);
1459 
1460  _AMPIMP friend bool __cdecl direct3d::is_timeout_disabled(const accelerator_view& _Accelerator_view);
1461 
1462  friend _Ret_ details::_Accelerator_view_impl* details::_Get_accelerator_view_impl_ptr(const accelerator_view& _Accl_view);
1463 
1464 public:
1465 
1470 
1474  _AMPIMP accelerator_view(const accelerator_view &_Other);
1475 
1480 
1484  _AMPIMP accelerator get_accelerator() const;
1485  __declspec(property(get=get_accelerator)) Concurrency::accelerator accelerator;
1486 
1491  _AMPIMP bool get_is_debug() const;
1492  __declspec(property(get=get_is_debug)) bool is_debug;
1493 
1497  _AMPIMP unsigned int get_version() const;
1498  __declspec(property(get=get_version)) unsigned int version; // hiword=major, loword=minor
1499 
1503  _AMPIMP queuing_mode get_queuing_mode() const;
1504  __declspec(property(get=get_queuing_mode)) Concurrency::queuing_mode queuing_mode;
1505 
1511  _AMPIMP bool get_is_auto_selection() const;
1512  __declspec(property(get=get_is_auto_selection)) bool is_auto_selection;
1513 
1517  _AMPIMP bool operator==(const accelerator_view &_Other) const;
1518 
1522  _AMPIMP bool operator!=(const accelerator_view &_Other) const;
1523 
1527  _AMPIMP void wait();
1528 
1533  _AMPIMP void flush();
1534 
1538  _AMPIMP concurrency::completion_future create_marker();
1539 
1540 private:
1541 
1542  // No default constructor
1543  accelerator_view();
1544 
1545  // Private constructor
1546  _AMPIMP accelerator_view(_Accelerator_view_impl_ptr _Impl, bool _Auto_selection = false);
1547 
1548 private:
1549 
1550  _Accelerator_view_impl_ptr _M_impl;
1551  bool _M_auto_selection;
1552 };
1553 
1554 namespace details
1555 {
1556  inline _Ret_ _Accelerator_view_impl* _Get_accelerator_view_impl_ptr(const accelerator_view& _Accl_view)
1557  {
1558  return _Accl_view._M_impl;
1559  }
1560 
1561  inline _Ret_ _Accelerator_impl* _Get_accelerator_impl_ptr(const accelerator& _Accl)
1562  {
1563  return _Accl._M_impl;
1564  }
1565 
1566  // Type defining a hasher for accelerator_view objects
1567  // for use with std::unordered_set and std::unordered_map
1569  {
1570  public:
1571  size_t operator()(const accelerator_view &_Accl_view) const
1572  {
1573  std::hash<_Accelerator_view_impl*> _HashFunctor;
1574  return _HashFunctor(_Accl_view._M_impl._Get_ptr());
1575  }
1576  };
1577 
1578  typedef std::unordered_set<accelerator_view, _Accelerator_view_hasher> _Accelerator_view_unordered_set;
1579 
1580  // Describes the N dimensional shape of a view in a buffer
1582  {
1583  public:
1584 
1585  _AMPIMP static _Ret_ _View_shape* __cdecl _Create_view_shape(unsigned int _Rank, unsigned int _Linear_offset,
1586  const unsigned int *_Base_extent, const unsigned int *_View_offset,
1587  const unsigned int *_View_extent, const bool *_Projection_info = NULL);
1588 
1589  _AMPIMP _Ret_ _View_shape* _Get_reduced_shape_for_copy();
1590 
1591  inline unsigned int _Get_rank() const
1592  {
1593  return _M_rank;
1594  }
1595 
1596  inline unsigned int _Get_linear_offset() const
1597  {
1598  return _M_linear_offset;
1599  }
1600 
1601  inline const unsigned int *_Get_base_extent() const
1602  {
1603  return _M_base_extent;
1604  }
1605 
1606  inline const unsigned int *_Get_view_offset() const
1607  {
1608  return _M_view_offset;
1609  }
1610  inline const unsigned int *_Get_view_extent() const
1611  {
1612  return _M_view_extent;
1613  }
1614 
1615  inline const bool *_Get_projection_info() const
1616  {
1617  return _M_projection_info;
1618  }
1619 
1620  inline bool _Is_projection() const
1621  {
1622  return _M_projection_info[0];
1623  }
1624 
1625  inline bool _Is_valid(size_t _Buffer_size) const
1626  {
1627  // The end point of the base shape should not be greater than the size of the buffer
1628  size_t endLinearOffset = _M_linear_offset + _Get_extent_size(_M_rank, _M_base_extent);
1629  if (endLinearOffset > _Buffer_size) {
1630  return false;
1631  }
1632 
1633  return _Is_valid();
1634  }
1635 
1636  inline unsigned int _Get_view_size() const
1637  {
1638  return _Get_extent_size(_M_rank, _M_view_extent);
1639  }
1640 
1641  inline unsigned int _Get_view_linear_offset() const
1642  {
1643  return _Get_linear_offset(_M_view_offset);
1644  }
1645 
1646  static inline bool
1647  _Compare_extent_with_elem_size(unsigned int _Rank, const unsigned int *_Extent1, size_t _Elem_size1, const unsigned int *_Extent2, size_t _Elem_size2)
1648  {
1649  _ASSERTE((_Rank >= 1) && (_Extent1 != NULL)&& (_Extent2 != NULL));
1650 
1651  // The extents should match accounting for the element sizes of the respective buffers
1652  if ((_Extent1[_Rank - 1] * _Elem_size1) != (_Extent2[_Rank - 1] * _Elem_size2))
1653  {
1654  return false;
1655  }
1656 
1657  // Now compare the extent in all but the least significant dimension
1658  if ((_Rank > 1) && !_Compare_extent(_Rank - 1, _Extent1, _Extent2))
1659  {
1660  return false;
1661  }
1662 
1663  return true;
1664  }
1665 
1666 
1667  static inline bool
1668  _Compare_extent(unsigned int _Rank, const unsigned int *_Extent1, const unsigned int *_Extent2)
1669  {
1670  for (size_t _I = 0; _I < _Rank; ++_I) {
1671  if (_Extent1[_I] != _Extent2[_I]) {
1672  return false;
1673  }
1674  }
1675 
1676  return true;
1677  }
1678 
1679  inline bool _Is_view_linear(unsigned int &_Linear_offset, unsigned int &_Linear_size) const
1680  {
1681  // The effective rank for the purpose of determining linearity
1682  // depends on the highest dimension in which the extent is not 1
1683  unsigned int _First_dim_with_non_unit_extent = 0;
1684  while ((_First_dim_with_non_unit_extent < _M_rank) && (_M_view_extent[_First_dim_with_non_unit_extent] == 1)) {
1685  _First_dim_with_non_unit_extent++;
1686  }
1687 
1688  unsigned int _Effective_rank = (_M_rank - _First_dim_with_non_unit_extent);
1689 
1690  // It is linear if the effective rank is <= 1 or the base extent
1691  // and view extent are same in all but the highest dimension with
1692  // non-unit extent
1693  if ((_Effective_rank <= 1) ||
1694  (_Compare_extent(_Effective_rank - 1, &_M_base_extent[_First_dim_with_non_unit_extent + 1], &_M_view_extent[_First_dim_with_non_unit_extent + 1])))
1695  {
1696  _Linear_offset = _Get_view_linear_offset();
1697  _Linear_size = _Get_view_size();
1698  return true;
1699  }
1700 
1701  return false;
1702  }
1703 
1704  inline bool _Overlaps(const _View_shape* _Other) const
1705  {
1706  if (_Compare_base_shape(_Other))
1707  {
1708  // If the base shapes are identical we will do the N-dimensional
1709  // bounding box overlap test
1710 
1711  for (size_t _I = 0; _I < _M_rank; ++_I)
1712  {
1713  if (!_Intervals_overlap(_M_view_offset[_I], _M_view_offset[_I] + _M_view_extent[_I] - 1,
1714  _Other->_M_view_offset[_I], _Other->_M_view_offset[_I] + _Other->_M_view_extent[_I] - 1))
1715  {
1716  return false;
1717  }
1718  }
1719 
1720  return true;
1721  }
1722  else
1723  {
1724  // The base shapes are different. Check based on linear intervals
1725  size_t firstStart = _Get_view_linear_offset();
1726  size_t firstEnd = firstStart + _Get_view_size() - 1;
1727 
1728  size_t secondStart = _Other->_Get_view_linear_offset();
1729  size_t secondEnd = secondStart + _Other->_Get_view_size() - 1;
1730 
1731  return _Intervals_overlap(firstStart, firstEnd, secondStart, secondEnd);
1732  }
1733  }
1734 
1735  inline bool _Subsumes(const _View_shape* _Other) const
1736  {
1737  // Subsumption test can only be done for shapes that have the same base shape or
1738  // when both have a rank of 1
1739  if ((_M_rank == 1) && (_Other->_Get_rank() == 1))
1740  {
1741  size_t thisStart = _Get_view_linear_offset();
1742  size_t thisEnd = thisStart + _Get_view_size() - 1;
1743 
1744  size_t otherStart = _Other->_Get_view_linear_offset();
1745  size_t otherEnd = otherStart + _Other->_Get_view_size() - 1;
1746 
1747  return ((otherStart >= thisStart) && (otherEnd <= thisEnd));
1748  }
1749 
1750  if (!_Compare_base_shape(_Other)) {
1751  return false;
1752  }
1753 
1754  if (!_Contains(_Other->_Get_view_offset())) {
1755  return false;
1756  }
1757 
1758  std::vector<unsigned int> otherEndPointIndex(_M_rank);
1759  for (size_t _I = 0; _I < _M_rank; ++_I) {
1760  otherEndPointIndex[_I] = _Other->_Get_view_offset()[_I] + _Other->_Get_view_extent()[_I] - 1;
1761  }
1762 
1763  return _Contains(otherEndPointIndex.data());
1764  }
1765 
1766  private:
1767  // Private constructor to force construction through the _Create_view_shape method
1768  _View_shape(unsigned int _Rank, unsigned int _Linear_offset,
1769  const unsigned int *_Base_extent, const unsigned int *_View_offset,
1770  const unsigned int *_View_extent, const bool *_Projection_info);
1771 
1772  virtual ~_View_shape();
1773 
1774  // No default constructor or copy/assignment
1775  _View_shape();
1776  _View_shape(const _View_shape &_Other);
1777  _View_shape(_View_shape &&_Other);
1778  _View_shape& operator=(const _View_shape &_Other);
1779  _View_shape& operator=(_View_shape &&_Other);
1780 
1781  // Helper methods
1782  static bool _Intervals_overlap(size_t _First_start, size_t _First_end,
1783  size_t _Second_start, size_t _Second_end)
1784  {
1785  // Order the intervals by their start points
1786  if (_First_start > _Second_start) {
1787  size_t temp = _First_start;
1788  _First_start = _Second_start;
1789  _Second_start = temp;
1790 
1791  temp = _First_end;
1792  _First_end = _Second_end;
1793  _Second_end = temp;
1794  }
1795 
1796  // The start of the second one must be within the bounds of the first one
1797  return (_Second_start <= _First_end);
1798  }
1799 
1800  static unsigned int _Get_extent_size(unsigned int _Rank, const unsigned int *_Extent)
1801  {
1802  unsigned int totalExtent = 1;
1803  for (size_t _I = 0; _I < _Rank; ++_I) {
1804  totalExtent *= _Extent[_I];
1805  }
1806 
1807  return totalExtent;
1808  }
1809 
1810  inline bool _Is_valid() const
1811  {
1812  if (_M_rank == 0) {
1813  return false;
1814  }
1815 
1816  // Ensure the _M_view_offset + _M_view_extent is within the bounds of _M_base_extent
1817  size_t viewSize = 1;
1818 
1819  for (size_t _I = 0; _I < _M_rank; ++_I)
1820  {
1821  viewSize *= _M_view_extent[_I];
1822  if ((_M_view_offset[_I] + _M_view_extent[_I]) > _M_base_extent[_I]) {
1823  return false;
1824  }
1825  }
1826 
1827  if (viewSize == 0) {
1828  return false;
1829  }
1830 
1831  return true;
1832  }
1833 
1834  inline bool _Compare_base_shape(const _View_shape* _Other) const
1835  {
1836  return ((_M_rank == _Other->_M_rank) &&
1837  (_M_linear_offset == _Other->_M_linear_offset) &&
1838  _Compare_extent(_M_rank, _M_base_extent, _Other->_M_base_extent));
1839  }
1840 
1841  // Checks if the element at the specified index
1842  // is contained within this view shape
1843  // Assumes the rank of the index is same as the
1844  // rank of this view's shape
1845  inline bool _Contains(const unsigned int* _Element_index) const
1846  {
1847  for (size_t _I = 0; _I < _M_rank; ++_I)
1848  {
1849  if ((_Element_index[_I] < _M_view_offset[_I]) ||
1850  (_Element_index[_I] >= (_M_view_offset[_I] + _M_view_extent[_I])))
1851  {
1852  return false;
1853  }
1854  }
1855 
1856  return true;
1857  }
1858 
1859  inline unsigned int _Get_linear_offset(const unsigned int* _Element_index) const
1860  {
1861  unsigned int currMultiplier = 1;
1862  unsigned int linearOffset = _M_linear_offset;
1863  for (int _I = static_cast<int>(_M_rank - 1); _I >= 0; _I--)
1864  {
1865  linearOffset += (currMultiplier * _Element_index[_I]);
1866  currMultiplier *= _M_base_extent[_I];
1867  }
1868 
1869  return linearOffset;
1870  }
1871 
1872  private:
1873 
1874  unsigned int _M_rank;
1875  unsigned int _M_linear_offset;
1876  unsigned int *_M_base_extent;
1877  unsigned int *_M_view_offset;
1878  unsigned int *_M_view_extent;
1880  };
1881 
1882  // This function creates a new _View_shape object from an existing _View_shape object when the data underlying the view
1883  // needs to be reinterpreted to use a different element size than the one used by the original view.
1884  inline
1885  _Ret_ _View_shape *_Create_reinterpreted_shape(const _View_shape* _Source_shape, size_t _Curr_elem_size, size_t _New_elem_size)
1886  {
1887  unsigned int _Rank = _Source_shape->_Get_rank();
1888  size_t _LinearOffsetInBytes = _Source_shape->_Get_linear_offset() * _Curr_elem_size;
1889  size_t _BaseLSDExtentInBytes = (_Source_shape->_Get_base_extent())[_Rank - 1] * _Curr_elem_size;
1890  size_t _ViewLSDOffsetInBytes = (_Source_shape->_Get_view_offset())[_Rank - 1] * _Curr_elem_size;
1891  size_t _ViewLSDExtentInBytes = (_Source_shape->_Get_view_extent())[_Rank - 1] * _Curr_elem_size;
1892 
1893  _ASSERTE((_LinearOffsetInBytes % _New_elem_size) == 0);
1894  _ASSERTE((_BaseLSDExtentInBytes % _New_elem_size) == 0);
1895  _ASSERTE((_ViewLSDOffsetInBytes % _New_elem_size) == 0);
1896  _ASSERTE((_ViewLSDExtentInBytes % _New_elem_size) == 0);
1897 
1898  size_t _Temp_val = _LinearOffsetInBytes / _New_elem_size;
1899  _ASSERTE(_Temp_val <= UINT_MAX);
1900  unsigned int _New_linear_offset = static_cast<unsigned int>(_Temp_val);
1901 
1902  std::vector<unsigned int> _New_base_extent(_Rank);
1903  std::vector<unsigned int> _New_view_offset(_Rank);
1904  std::vector<unsigned int> _New_view_extent(_Rank);
1905  for (unsigned int i = 0; i < _Rank - 1; ++i) {
1906  _New_base_extent[i] = (_Source_shape->_Get_base_extent())[i];
1907  _New_view_offset[i] = (_Source_shape->_Get_view_offset())[i];
1908  _New_view_extent[i] = (_Source_shape->_Get_view_extent())[i];
1909  }
1910 
1911  // The extent in the least significant dimension needs to be adjusted
1912  _Temp_val = _BaseLSDExtentInBytes / _New_elem_size;
1913  _ASSERTE(_Temp_val <= UINT_MAX);
1914  _New_base_extent[_Rank - 1] = static_cast<unsigned int>(_Temp_val);
1915 
1916  _Temp_val = _ViewLSDOffsetInBytes / _New_elem_size;
1917  _ASSERTE(_Temp_val <= UINT_MAX);
1918  _New_view_offset[_Rank - 1] = static_cast<unsigned int>(_Temp_val);
1919 
1920  _Temp_val = _ViewLSDExtentInBytes / _New_elem_size;
1921  _ASSERTE(_Temp_val <= UINT_MAX);
1922  _New_view_extent[_Rank - 1] = static_cast<unsigned int>(_Temp_val);
1923 
1924  return _View_shape::_Create_view_shape(_Rank, _New_linear_offset, _New_base_extent.data(), _New_view_offset.data(), _New_view_extent.data());
1925  }
1926 
1928  {
1929  switch(cpu_access_type)
1930  {
1931  case access_type_auto:
1932  case access_type_read:
1933  return _Read_access;
1934  case access_type_write:
1935  return _Write_access;
1937  return _Read_write_access;
1938  case access_type_none:
1939  default:
1940  _ASSERTE(false);
1941  return _No_access;
1942  }
1943  }
1944 
1946  {
1947  access_type _Cpu_access_type = access_type_none;
1948  if (_Cpu_access_mode & _Read_access) {
1949  _Cpu_access_type = static_cast<access_type>(_Cpu_access_type | access_type_read);
1950  }
1951 
1952  if (_Cpu_access_mode & _Write_access) {
1953  _Cpu_access_type = static_cast<access_type>(_Cpu_access_type | access_type_write);
1954  }
1955 
1956  return _Cpu_access_type;
1957  }
1958 
1959  // Class manages a raw buffer in a accelerator view
1961  {
1962  friend class _CPU_accelerator_view_impl;
1963  friend class _D3D_accelerator_view_impl;
1964  friend class _D3D_temp_staging_cache;
1965 
1966  public:
1967 
1968  // Force construction through these static public method to ensure that _Buffer
1969  // objects are allocated in the runtime
1970 
1971  // Allocate a new buffer on the specified accelerator_view
1972  _AMPIMP static _Ret_ _Buffer * __cdecl _Create_buffer(accelerator_view _Accelerator_view, accelerator_view _Access_on_accelerator_view, size_t _Num_elems,
1973  size_t _Elem_size, bool _Is_temp = false, access_type _Cpu_access_type = access_type_auto);
1974 
1975  // Create a buffer object from a pre-allocated storage on the specified accelerator_view. This can be thought
1976  // of as the accelerator_view "adopting" the passed data buffer.
1977  _AMPIMP static _Ret_ _Buffer * __cdecl _Create_buffer(_In_ void *_Data_ptr, accelerator_view _Accelerator_view, size_t _Num_elems,
1978  size_t _Elem_size);
1979 
1980  // Create a staging buffer on the specified accelerator_view which can be accesed on the cpu upon mapping.
1981  _AMPIMP static _Ret_ _Buffer * __cdecl _Create_stage_buffer(accelerator_view _Accelerator_view, accelerator_view _Access_on_accelerator_view,
1982  size_t _Num_elems, size_t _Elem_size, bool _Is_temp = false);
1983 
1984  // Creates a temp staging buffer of the requested size. This function may create
1985  // a staging buffer smaller than the requested size.
1986  _AMPIMP static _Ret_ _Buffer * __cdecl _Get_temp_staging_buffer(accelerator_view _Av, size_t _Requested_num_elems, size_t _Elem_size);
1987 
1988  // Map a zero-copy or staging buffer for access on the CPU.
1989  _AMPIMP void _Map_buffer(_Access_mode _Map_type, bool _Wait);
1990 
1991  // Asynchronously map a zero-copy or staging buffer for access on the CPU.
1992  _AMPIMP _Event _Map_buffer_async(_Access_mode _Map_type);
1993 
1994  // Unmap a zero-copy or staging buffer denying CPU access
1995  _AMPIMP void _Unmap_buffer();
1996 
1997  // Copy data to _Dest asynchronously.
1998  _AMPIMP _Event _Copy_to_async(_Out_ _Buffer * _Dest, size_t _Num_elems, size_t _Src_offset = 0, size_t _Dest_offset = 0);
1999 
2000  // Copy data to _Dest asynchronously.
2001  _AMPIMP _Event _Copy_to_async(_Out_ _Buffer * _Dest, _View_shape_ptr _Src_shape, _View_shape_ptr _Dest_shape);
2002 
2003  _AMPIMP accelerator_view _Get_accelerator_view() const;
2004  _AMPIMP accelerator_view _Get_access_on_accelerator_view() const;
2005 
2006  _AMPIMP void _Register_view(_In_ _View_key _Key);
2007  _AMPIMP void _Unregister_view(_In_ _View_key _Key);
2008 
2009  // Return the raw data ptr - only a accelerator view implementation can interpret
2010  // this raw pointer. This method should usually not be used in the AMP header files
2011  // The _Get_host_ptr is the right way for accessing the host accesible ptr for a buffer
2012  _Ret_ void * _Get_data_ptr() const
2013  {
2014  return _M_data_ptr;
2015  }
2016 
2017  // Returns the host accessible ptr corresponding to the buffer. This would
2018  // return NULL when the buffer is inaccesible on the CPU
2019  _Ret_ void * _Get_host_ptr() const
2020  {
2021  return _M_host_ptr;
2022  }
2023 
2024  size_t _Get_elem_size() const
2025  {
2026  return _M_elem_size;
2027  }
2028 
2029  size_t _Get_num_elems() const
2030  {
2031  return _M_num_elems;
2032  }
2033 
2034  _Ret_ _Accelerator_view_impl* _Get_accelerator_view_impl() const
2035  {
2036  return _M_accelerator_view;
2037  }
2038 
2039  _Ret_ _Accelerator_view_impl* _Get_access_on_accelerator_view_impl() const
2040  {
2041  return _M_access_on_accelerator_view;
2042  }
2043 
2044  bool _Owns_data() const
2045  {
2046  return _M_owns_data;
2047  }
2048 
2049  _AMPIMP bool _Exclusively_owns_data();
2050 
2051  bool _Is_staging() const
2052  {
2053  return _M_is_staging;
2054  }
2055 
2057  {
2058  return _M_allowed_host_access_mode;
2059  }
2060 
2062  {
2063  return _Get_cpu_access_type(_M_allowed_host_access_mode);
2064  }
2065 
2066  bool _Is_host_accessible(_Access_mode _Requested_access_mode) const
2067  {
2068  return ((_Get_allowed_host_access_mode() & _Requested_access_mode) == _Requested_access_mode);
2069  }
2070 
2072  {
2073  return _M_current_host_access_mode;
2074  }
2075 
2076  bool _Is_temp() const
2077  {
2078  return _M_is_temp;
2079  }
2080 
2081  bool _Is_adopted() const
2082  {
2083  // Is it adopted from interop?
2084  return _M_is_adopted;
2085  }
2086 
2087  bool _Is_buffer() const
2088  {
2089  return _M_is_buffer;
2090  }
2091 
2092  _AMPIMP bool _Is_mappable() const;
2093 
2094  protected:
2095 
2096  // The _Buffer constructor is protected to force construction through the static
2097  // _Create_buffer method to ensure the object is allocated in the runtime
2098  _Buffer(_In_ _Accelerator_view_impl* _Av, _In_ void *_Buffer_data_ptr, _In_ void * _Host_ptr,
2099  _Access_mode _Allowed_host_access_mode, _Access_mode _Current_host_access_mode, size_t _Num_elems,
2100  size_t _Elem_size, bool _Owns_data, bool _Is_staging, bool _Is_temp, bool _Is_adopted);
2101 
2102  // protected destructor to force deletion through _Release
2103  virtual ~_Buffer();
2104 
2105  // No default consturctor, copy constructor and assignment operator
2106  _Buffer();
2107  _Buffer(const _Buffer &rhs);
2108  _Buffer &operator=(const _Buffer &rhs);
2109 
2110  void _Set_host_ptr(_In_ void *_Host_ptr, _Access_mode _Host_access_mode = _No_access)
2111  {
2112  _ASSERTE((_Host_ptr == NULL) || (_Host_access_mode != _No_access));
2113 
2114  _M_host_ptr = _Host_ptr;
2115  if (_Host_ptr == NULL) {
2116  _M_current_host_access_mode = _No_access;
2117  }
2118  else {
2119  _M_current_host_access_mode = _Host_access_mode;
2120  }
2121  }
2122 
2123  void _Set_data_ptr(_In_ IUnknown *_Data_ptr)
2124  {
2125  _M_data_ptr = _Data_ptr;
2126  }
2127 
2128  protected:
2129  _Accelerator_view_impl_ptr _M_accelerator_view;
2130  _Accelerator_view_impl_ptr _M_access_on_accelerator_view;
2131  void * _M_data_ptr;
2132  void * _M_host_ptr;
2139 
2140  // Used to determine how to map the staging buffer after its involved in a copy
2142 
2145  private:
2146  // A set of view_keys to invalidate whenever the host ptr of a staging buffer is invalidated
2147  std::unique_ptr<std::unordered_set<_View_key>> _M_view_keys;
2149  };
2150 
2151  // Class manages a texture in a accelerator view
2152  class _Texture : public _Buffer
2153  {
2154  friend class _CPU_accelerator_view_impl;
2155  friend class _D3D_accelerator_view_impl;
2156  friend class _D3D_temp_staging_cache;
2157 
2158  public:
2159 
2160  // Allocate a new texture on the specified accelerator_view
2161  _AMPIMP static _Ret_ _Texture * __cdecl _Create_texture(accelerator_view _Accelerator_view,
2162  unsigned int _Rank,
2163  size_t _Width, size_t _Height, size_t _Depth,
2164  unsigned int _Mip_levels,
2165  _Short_vector_base_type_id _Type_id,
2166  unsigned int _Num_channels,
2167  unsigned int _Bits_per_channel,
2168  bool _Is_temp = false);
2169 
2170  // Create a texture object from a pre-allocated storage on the specified accelerator_view. This can be thought
2171  // of as the accelerator_view "adopting" the passed data buffer.
2172  _AMPIMP static _Ret_ _Texture * __cdecl _Adopt_texture(unsigned int _Rank, _Texture_base_type_id _Id,
2173  _In_ IUnknown *_Data_ptr, accelerator_view _Accelerator_view,
2174  unsigned int _View_format);
2175 
2176  // Create a staging texture on the specified accelerator_view which can be accesed on the cpu upon mapping.
2177  _AMPIMP static _Ret_ _Texture * __cdecl _Create_stage_texture(accelerator_view _Accelerator_view, accelerator_view _Access_on_accelerator_view,
2178  unsigned int _Rank,
2179  size_t _Width, size_t _Height, size_t _Depth,
2180  unsigned int _Mip_levels,
2181  unsigned int _Format,
2182  bool _Is_temp = false);
2183 
2184  // Create a staging texture on the specified accelerator_view which can be accesed on the cpu upon mapping.
2185  _AMPIMP static _Ret_ _Texture * __cdecl _Create_stage_texture(accelerator_view _Accelerator_view, accelerator_view _Access_on_accelerator_view,
2186  unsigned int _Rank,
2187  size_t _Width, size_t _Height, size_t _Depth,
2188  unsigned int _Mip_levels,
2189  _Short_vector_base_type_id _Type_id,
2190  unsigned int _Num_channels,
2191  unsigned int _Bits_per_channel);
2192 
2193  // Creates a temp staging texture. This function may create
2194  // a staging texture smaller than the requested size.
2195  _AMPIMP static _Ret_ _Texture * __cdecl _Get_temp_staging_texture(accelerator_view _Accelerator_view,
2196  unsigned int _Rank,
2197  size_t _Width, size_t _Height, size_t _Depth,
2198  unsigned int _Mip_levels,
2199  unsigned int _Format);
2200 
2201  // Constructs a new texture with the same properties as the given texture.
2202  _AMPIMP static _Ret_ _Texture * __cdecl _Clone_texture(const _Texture *_Src, const accelerator_view &_Accelerator_view, const accelerator_view &_Associated_av);
2203 
2204  // Copy data to _Dest asynchronously for textures. The two textures must have been created with
2205  // compatible physical formats.
2206  _AMPIMP _Event _Copy_to_async(_Out_ _Texture * _Dest, const size_t *_Copy_extent,
2207  const size_t *_Src_offset, const size_t *_Dst_offset,
2208  unsigned int _Src_mipmap_level, unsigned int _Dst_mipmap_level);
2209 
2210  size_t _Get_width(unsigned int _Mip_offset = 0) const
2211  {
2212  return (_M_width >> _Mip_offset) ? (_M_width >> _Mip_offset) : 1U;
2213  }
2214 
2215  size_t _Get_height(unsigned int _Mip_offset = 0) const
2216  {
2217  return (_M_height >> _Mip_offset) ? (_M_height >> _Mip_offset) : 1U;
2218  }
2219 
2220  size_t _Get_depth(unsigned int _Mip_offset = 0) const
2221  {
2222  return (_M_depth >> _Mip_offset) ? (_M_depth >> _Mip_offset) : 1U;
2223  }
2224 
2225  unsigned int _Get_rank() const
2226  {
2227  return _M_rank;
2228  }
2229 
2230  unsigned int _Get_texture_format() const
2231  {
2232  return _M_texture_format;
2233  }
2234 
2235  unsigned int _Get_view_format() const
2236  {
2237  return _M_view_format;
2238  }
2239 
2240  unsigned int _Get_num_channels() const
2241  {
2242  return _M_num_channels;
2243  }
2244 
2245  unsigned int _Get_bits_per_channel() const
2246  {
2247  // For texture adopted from interop, return 0.
2248  return _Is_adopted() ? 0 : _M_bits_per_channel;
2249  }
2250 
2251  unsigned int _Get_bits_per_element() const
2252  {
2253  return _M_bits_per_channel * _M_num_channels;
2254  }
2255 
2256  unsigned int _Get_data_length(unsigned int _Most_detailed_mipmap_level, unsigned int _View_mipmap_levels, const size_t *_Extents = nullptr) const // in bytes
2257  {
2258  _ASSERTE(_View_mipmap_levels);
2259 
2260  unsigned long long _Bits_per_byte = 8ULL;
2261  unsigned long long _Total_bytes = 0ULL;
2262 
2263  unsigned int _Mip_level = _Most_detailed_mipmap_level;
2264 
2265  // Sum up data length (in bytes) of all mipmap levels in the view
2266  for (unsigned int _Mip_offset=0; _Mip_offset < _View_mipmap_levels; ++_Mip_offset)
2267  {
2268  unsigned long long _Width = 1ULL;
2269  unsigned long long _Height = 1ULL;
2270  unsigned long long _Depth = 1ULL;
2271 
2272  if (_Extents)
2273  {
2274  switch (_M_rank)
2275  {
2276  case 3:
2277  _Depth = (_Extents[2] >> _Mip_level) ? (_Extents[2] >> _Mip_level) : 1U;
2278  // deliberately fall thru
2279  case 2:
2280  _Height = (_Extents[1] >> _Mip_level) ? (_Extents[1] >> _Mip_level) : 1U;
2281  // deliberately fall thru
2282  case 1:
2283  _Width = (_Extents[0] >> _Mip_level) ? (_Extents[0] >> _Mip_level) : 1U;
2284  break;
2285  default:
2286  _ASSERTE(false); // textures are only rank 1-3
2287  }
2288  }
2289  else
2290  {
2291  _Width = _Get_width(_Mip_level);
2292  _Height = _Get_height(_Mip_level);
2293  _Depth = _Get_depth(_Mip_level);
2294  }
2295 
2296  // Note _Get_bits_per_element() can be smaller than 8
2297  // Use unsigned long long to avoid integer overflow
2298  _Total_bytes += ((_Width * _Height * _Depth * static_cast<unsigned long long>(_Get_bits_per_element())) + _Bits_per_byte - 1) / _Bits_per_byte;
2299 
2300  _Mip_level++;
2301  }
2302 
2303  return static_cast<unsigned int>(_Total_bytes);
2304  }
2305 
2306  unsigned int _Get_mip_levels() const
2307  {
2308  return _M_mip_levels;
2309  }
2310 
2311  size_t _Get_row_pitch() const
2312  {
2313  return _M_row_pitch;
2314  }
2315 
2316  void _Set_row_pitch(size_t _Val)
2317  {
2318  _M_row_pitch = _Val;
2319  }
2320 
2321  size_t _Get_depth_pitch() const
2322  {
2323  return _M_depth_pitch;
2324  }
2325 
2326  void _Set_depth_pitch(size_t _Val)
2327  {
2328  _M_depth_pitch = _Val;
2329  }
2330 
2331  private:
2332 
2333  // The _Texture constructor is private to force construction through the static
2334  // _Create_texture method to ensure the object is allocated in the runtime
2335  _Texture(_In_ _Accelerator_view_impl* _Av, _In_ void *_Texture_data_ptr, _In_ void * _Host_ptr,
2336  _Access_mode _Allowed_host_access_mode, _Access_mode _Current_host_access_mode,
2337  unsigned int _Rank,
2338  size_t _Width, size_t _Height, size_t _Depth,
2339  unsigned int _Mip_levels,
2340  unsigned int _Texture_format,
2341  unsigned int _View_format,
2342  unsigned int _Num_channels,
2343  unsigned int _Bits_per_channel,
2344  bool _Owns_data, bool _Is_staging, bool _Is_temp, bool _Is_adopted);
2345 
2346  // Private destructor to force deletion through _Release
2347  ~_Texture();
2348 
2349  // No default consturctor, copy constructor and assignment operator
2350  _Texture();
2351  _Texture(const _Texture &rhs);
2352  _Texture &operator=(const _Texture &rhs);
2353 
2354  // Texture only
2355  unsigned int _M_rank;
2356  size_t _M_width;
2357  size_t _M_height;
2358  size_t _M_depth;
2359  unsigned int _M_texture_format;
2360  unsigned int _M_view_format;
2361  unsigned int _M_bits_per_channel;
2362  unsigned int _M_num_channels;
2363  unsigned int _M_mip_levels;
2364 
2367  };
2368 
2370  {
2371  public:
2372  // Create a new sampler with configurations exposed by C++ AMP.
2373  _AMPIMP static _Ret_ _Sampler * __cdecl _Create(
2374  unsigned int _Filter_mode,
2375  unsigned int _Address_mode,
2376  float _Border_r,
2377  float _Border_g,
2378  float _Border_b,
2379  float _Border_a);
2380 
2381  // Create a sampler object given an adopted opaque data pointer
2382  _AMPIMP static _Ret_ _Sampler * __cdecl _Create(_In_ void *_Data_ptr);
2383 
2384  // Return the raw data ptr - only an accelerator view implementation can interpret
2385  // this raw pointer. This method should usually not be used in the AMP header files
2386  _Ret_ void * _Get_data_ptr() const
2387  {
2388  return _M_data_ptr;
2389  }
2390 
2391  bool _Is_adopted() const
2392  {
2393  // Is it adopted from interop?
2394  return _M_is_adopted;
2395  }
2396 
2397  unsigned int _Get_filter_mode() const
2398  {
2399  return _M_filter_mode;
2400  }
2401 
2402  unsigned int _Get_address_mode() const
2403  {
2404  return _M_address_mode;
2405  }
2406 
2407  const float* _Get_border_color() const
2408  {
2409  return &_M_border_color[0];
2410  }
2411 
2412  private:
2413  // The _Sampler constructor is private to force construction through the static
2414  // _Create method to ensure the object is allocated in the runtime
2415  _Sampler(unsigned int _Filter_mode, unsigned int _Address_mode, float _Border_r, float _Border_g, float _Border_b, float _Border_a);
2416 
2417  _Sampler(_In_ void *_Data_ptr);
2418 
2419  // Private destructor to force deletion through _Release
2420  ~_Sampler();
2421 
2422  // No default consturctor, copy constructor and assignment operator
2423  _Sampler();
2424  _Sampler(const _Sampler &rhs);
2425  _Sampler &operator=(const _Sampler &rhs);
2426 
2427  void * _M_data_ptr;
2429  unsigned int _M_filter_mode;
2430  unsigned int _M_address_mode;
2431  float _M_border_color[4];
2432  };
2433 
2434  // Forward declaration for copy helper functions
2435  _AMPIMP _Event __cdecl _Copy_impl(_In_ _Buffer *_Src, size_t _Src_offset,
2436  _Out_ _Buffer * _Dst, size_t _Dest_offset,
2437  size_t _Num_elems, size_t _Preferred_copy_chunk_num_elems = 0);
2438 
2439  _AMPIMP _Event __cdecl _Copy_async_impl(_In_ _Texture *_Src_tex, const size_t *_Src_offset, unsigned int _Src_mipmap_level,
2440  _Out_ _Texture *_Dst_tex, const size_t *_Dst_offset, unsigned int _Dst_mipmap_level,
2441  const size_t *_Copy_extent, const size_t *_Preferred_copy_chunk_extent = NULL);
2442 
2443  inline bool _Get_chunked_staging_texture(_In_ _Texture* _Tex, const size_t *_Copy_chunk_extent, _Inout_ size_t *_Remaining_copy_extent, _Out_ size_t *_Curr_copy_extent, _Out_ _Texture_ptr *_Staging_texture)
2444  {
2445  bool _Truncated_copy = false;
2446  size_t _Allocation_extent[3] = { _Copy_chunk_extent[0], _Copy_chunk_extent[1], _Copy_chunk_extent[2] };
2447 
2448  unsigned int _Most_sig_idx = _Tex->_Get_rank() - 1;
2449 
2450  if (_Allocation_extent[_Most_sig_idx] > _Remaining_copy_extent[_Most_sig_idx]) {
2451  _Allocation_extent[_Most_sig_idx] = _Remaining_copy_extent[_Most_sig_idx];
2452  }
2453 
2454  _Texture_ptr _Stage = _Texture::_Get_temp_staging_texture(_Tex->_Get_accelerator_view(), _Tex->_Get_rank(),
2455  _Allocation_extent[0], _Allocation_extent[1], _Allocation_extent[2],
2456  /*_Mip_levels=*/1, _Tex->_Get_texture_format());
2457 
2458  std::copy(&_Allocation_extent[0], &_Allocation_extent[3], stdext::make_unchecked_array_iterator(&_Curr_copy_extent[0]));
2459  size_t _Staging_tex_extent[3] = {_Stage->_Get_width(), _Stage->_Get_height(), _Stage->_Get_depth()};
2460  if (_Curr_copy_extent[_Most_sig_idx] > _Staging_tex_extent[_Most_sig_idx]) {
2461  _Curr_copy_extent[_Most_sig_idx] = _Staging_tex_extent[_Most_sig_idx];
2462  }
2463 
2464  // The truncation can however happen only in the most significant dimension and lower
2465  // dimensions should not get truncated
2466  if (_Curr_copy_extent[_Most_sig_idx] < _Remaining_copy_extent[_Most_sig_idx])
2467  {
2468  _Remaining_copy_extent[_Most_sig_idx] -= _Curr_copy_extent[_Most_sig_idx];
2469  _Truncated_copy = true;
2470  }
2471 
2472  for (unsigned int _I = 0; _I < _Most_sig_idx; _I++)
2473  {
2474  _ASSERTE(_Curr_copy_extent[_I] == _Remaining_copy_extent[_I]);
2475  }
2476 
2477  *_Staging_texture = _Stage;
2478  return _Truncated_copy;
2479  }
2480 
2481  #pragma warning ( push )
2482  #pragma warning ( disable : 6101 )
2483  // Supress "warning C6101: Returning uninitialized memory '*_Dst'.: A successful"
2484  // "path through the function does not set the named _Out_ parameter."
2485  // The callers to _Copy_data_on_host all have static_assert that _Rank has to be 1, 2, or 3 dimensions for texture
2486  //
2487  template <typename _Input_iterator, typename _Value_type>
2488  inline void _Copy_data_on_host_src_iter
2489  (int _Rank, _Input_iterator _Src, _Out_ _Value_type *_Dst,
2490  size_t _Width, size_t _Height, size_t _Depth,
2491  size_t _Dst_row_pitch_in_bytes, size_t _Dst_depth_pitch_in_bytes,
2492  size_t _Src_row_pitch, size_t _Src_depth_pitch)
2493  {
2494  switch(_Rank)
2495  {
2496  case 1:
2497  {
2498  _Input_iterator _End = _Src;
2499  std::advance(_End, _Width);
2501  }
2502  break;
2503  case 2:
2504  {
2505  unsigned char *_Dst_ptr = reinterpret_cast<unsigned char *>(_Dst);
2506  _Input_iterator _Src_start = _Src;
2507  for (size_t _I = 0; _I < _Height; _I++)
2508  {
2509  _Input_iterator _Src_end = _Src_start;
2510  std::advance(_Src_end, _Width);
2511 
2512  std::copy(_Src_start, _Src_end, stdext::make_unchecked_array_iterator(reinterpret_cast<_Value_type*>(_Dst_ptr)));
2513 
2514  _Dst_ptr += _Dst_row_pitch_in_bytes;
2515  std::advance(_Src_start, _Src_row_pitch);
2516  }
2517  }
2518  break;
2519  case 3:
2520  {
2521  unsigned char *_Dst_ptr_slice_start = reinterpret_cast<unsigned char *>(_Dst);
2522  _Input_iterator _Src_depth_slice_start = _Src;
2523  for (size_t _I = 0; _I < _Depth; _I++)
2524  {
2525  _Input_iterator _Src_start = _Src_depth_slice_start;
2526  unsigned char *_Dst_ptr = _Dst_ptr_slice_start;
2527 
2528  for (size_t _J = 0; _J < _Height; _J++)
2529  {
2530  _Input_iterator _Src_end = _Src_start;
2531  std::advance(_Src_end, _Width);
2532 
2533  std::copy(_Src_start, _Src_end, stdext::make_unchecked_array_iterator(reinterpret_cast<_Value_type*>(_Dst_ptr)));
2534 
2535  _Dst_ptr += _Dst_row_pitch_in_bytes;
2536  std::advance(_Src_start, _Src_row_pitch);
2537  }
2538 
2539  _Dst_ptr_slice_start += _Dst_depth_pitch_in_bytes;
2540  std::advance(_Src_depth_slice_start, _Src_depth_pitch);
2541  }
2542  }
2543  break;
2544  default:
2545  _ASSERTE(FALSE);
2546  break;
2547  }
2548  }
2549  #pragma warning ( pop ) // disable : 6101
2550 
2551  template <typename _Output_iterator, typename _Value_type>
2552  inline void _Copy_data_on_host_dst_iter
2553  (int _Rank, const _Value_type * _Src, _Output_iterator _Dst,
2554  size_t _Width, size_t _Height, size_t _Depth,
2555  size_t _Src_row_pitch_in_bytes, size_t _Src_depth_pitch_in_bytes,
2556  size_t _Dst_row_pitch, size_t _Dst_depth_pitch)
2557  {
2558  switch(_Rank)
2559  {
2560  case 1:
2561  {
2562  const _Value_type * _End = _Src + _Width;
2564  }
2565  break;
2566  case 2:
2567  {
2568  const unsigned char *_Src_ptr = reinterpret_cast<const unsigned char *>(_Src);
2569  _Output_iterator _Dst_iter = _Dst;
2570  for (size_t _I = 0; _I < _Height; _I++)
2571  {
2572  const _Value_type * _Src_end = reinterpret_cast<const _Value_type*>(_Src_ptr) + _Width;
2573 
2574  std::copy(stdext::make_unchecked_array_iterator(reinterpret_cast<const _Value_type*>(_Src_ptr)), stdext::make_unchecked_array_iterator(_Src_end), _Dst_iter);
2575  std::advance(_Dst_iter, _Dst_row_pitch);
2576  _Src_ptr += _Src_row_pitch_in_bytes;
2577  }
2578  }
2579  break;
2580  case 3:
2581  {
2582  const unsigned char *_Src_ptr_slice_start = reinterpret_cast<const unsigned char *>(_Src);
2583  _Output_iterator _Dst_depth_slice_start = _Dst;
2584  for (size_t _I = 0; _I < _Depth; _I++)
2585  {
2586  _Output_iterator _Dst_iter = _Dst_depth_slice_start;
2587  const unsigned char *_Src_ptr = _Src_ptr_slice_start;
2588 
2589  for (size_t _J = 0; _J < _Height; _J++)
2590  {
2591  const _Value_type * _Src_end = reinterpret_cast<const _Value_type *>(_Src_ptr) + _Width;
2592 
2593  std::copy(stdext::make_unchecked_array_iterator(reinterpret_cast<const _Value_type*>(_Src_ptr)), stdext::make_unchecked_array_iterator(_Src_end), _Dst_iter);
2594 
2595  std::advance(_Dst_iter, _Dst_row_pitch);
2596  _Src_ptr += _Src_row_pitch_in_bytes;
2597  }
2598 
2599  _Src_ptr_slice_start += _Src_depth_pitch_in_bytes;
2600  std::advance(_Dst_depth_slice_start, _Dst_depth_pitch);
2601  }
2602  }
2603  break;
2604  default:
2605  _ASSERTE(FALSE);
2606  break;
2607  }
2608  }
2609 
2610  _AMPIMP size_t __cdecl _Get_preferred_copy_chunk_size(size_t _Total_copy_size_in_bytes);
2611 
2612  inline size_t _Get_preferred_copy_chunk_num_elems(size_t _Total_num_elems, size_t _Elem_size)
2613  {
2614  size_t preferredChunkSize = _Get_preferred_copy_chunk_size(_Total_num_elems * _Elem_size);
2615 
2616  return (preferredChunkSize / _Elem_size);
2617  }
2618 
2619  inline void _Get_preferred_copy_chunk_extent(unsigned int _Rank, size_t _Width, size_t _Height,
2620  size_t _Depth, size_t _Bits_per_element, _Out_writes_(3) size_t *_Preferred_copy_chunk_extent)
2621  {
2622  _ASSERTE(_Preferred_copy_chunk_extent != nullptr);
2623 
2624  size_t requestedByteSize = static_cast<size_t>((static_cast<unsigned long long>(_Width) *
2625  static_cast<unsigned long long>(_Height) *
2626  static_cast<unsigned long long>(_Depth) *
2627  static_cast<unsigned long long>(_Bits_per_element)) >> 3);
2628 
2629  size_t preferredChunkSize = _Get_preferred_copy_chunk_size(requestedByteSize);
2630 
2631  // Lets align the allocation size to the element size of the texture
2632  size_t preferredCopyChunkNumElems = static_cast<size_t>((static_cast<unsigned long long>(preferredChunkSize) * 8U) / _Bits_per_element);
2633 
2634  // Lets truncate the dimensions of the requested staging texture.
2635  // We only truncate in the most significant dimension
2636  switch (_Rank)
2637  {
2638  case 1:
2639  _Width = preferredCopyChunkNumElems;
2640  break;
2641  case 2:
2642  _Height = (preferredCopyChunkNumElems + _Width - 1) / _Width;
2643  break;
2644  case 3:
2645  _Depth = (preferredCopyChunkNumElems + (_Height * _Width) - 1) / (_Height * _Width);
2646  break;
2647  default:
2648  _ASSERTE(false);
2649  }
2650 
2651  _Preferred_copy_chunk_extent[0] = _Width;
2652  _Preferred_copy_chunk_extent[1] = _Height;
2653  _Preferred_copy_chunk_extent[2] = _Depth;
2654  }
2655 
2656  // Finds the greatest common divisor of 2 unsigned integral numbers using Euclid's algorithm
2657  template <typename _T>
2658  inline _T _Greatest_common_divisor(_T _M, _T _N)
2659  {
2660  static_assert(std::is_unsigned<_T>::value, "This GCD function only supports unsigned integral types");
2661 
2662  _ASSERTE((_M > 0) && (_N > 0));
2663 
2664  if (_N > _M) {
2665  std::swap(_N , _M);
2666  }
2667 
2668  _T _Temp;
2669  while (_N > 0)
2670  {
2671  _Temp = _N;
2672  _N = _M % _N;
2673  _M = _Temp;
2674  }
2675 
2676  return _M;
2677  }
2678 
2679  // Finds the least common multiple of 2 unsigned integral numbers using their greatest_common_divisor
2680  template <typename _T>
2681  inline _T _Least_common_multiple(_T _M, _T _N)
2682  {
2683  static_assert(std::is_unsigned<_T>::value, "This LCM function only supports unsigned integral types");
2684 
2685  _ASSERTE((_M > 0) && (_N > 0));
2686 
2687  _T _Gcd = _Greatest_common_divisor(_M, _N);
2688  return ((_M / _Gcd) * _N);
2689  }
2690 
2691  template <typename InputIterator, typename _Value_type>
2692  inline _Event _Copy_impl(InputIterator _SrcFirst, InputIterator _SrcLast, size_t _NumElemsToCopy,
2693  _Out_ _Buffer * _Dst, size_t _Dest_offset, size_t _Preferred_copy_chunk_num_elems = 0)
2694  {
2695  if (_NumElemsToCopy == 0) {
2696  return _Event();
2697  }
2698 
2699  if (_Dst == NULL) {
2700  throw runtime_exception("Failed to copy to buffer.", E_INVALIDARG);
2701  }
2702 
2703 #pragma warning ( push )
2704 #pragma warning ( disable : 6001 ) // Using uninitialized memory '*_Dst'
2705  if (((_NumElemsToCopy * sizeof(_Value_type)) + (_Dest_offset * _Dst->_Get_elem_size())) > (_Dst->_Get_num_elems() * _Dst->_Get_elem_size()))
2706  {
2707  throw runtime_exception("Invalid _Src argument(s). _Src size exceeds total size of the _Dest.", E_INVALIDARG);
2708  }
2709 #pragma warning ( pop )
2710 
2711  _ASSERTE(_NumElemsToCopy == (size_t)(std::distance(_SrcFirst, _SrcLast)));
2712 
2713  // If the dest is host accessible for write then we do the copy on
2714  // accelerator(accelerator::cpu_accelerator).default_view
2715  if (_Dst->_Is_host_accessible(_Write_access))
2716  {
2717  // Lets first map the _Dst buffer
2718  _Event _Ev = _Dst->_Map_buffer_async(_Write_access);
2719 
2720  // The _Dest is accessible on host. We just need to do a std::copy using a raw pointer as OutputIterator
2721  _Buffer_ptr _PDestBuf = _Dst;
2722  _Ev = _Ev._Add_continuation(std::function<_Event()>([_PDestBuf,_Dest_offset, _SrcFirst, _SrcLast]() mutable -> _Event
2723  {
2724  _Value_type *_DestPtr = reinterpret_cast<_Value_type*>(reinterpret_cast<char*>(_PDestBuf->_Get_host_ptr()) + (_Dest_offset * _PDestBuf->_Get_elem_size()));
2725  std::copy(_SrcFirst, _SrcLast, stdext::make_unchecked_array_iterator(_DestPtr));
2726 
2727  return _Event();
2728  }));
2729 
2730  return _Ev;
2731  }
2732  else
2733  {
2734  // _Dest is on a device. Lets create a temp staging buffer on the _Dest accelerator_view and copy the input over
2735  // We may create a staging buffer of size smaller than the copy size and in that case we will perform the copy
2736  // as a series of smaller copies
2737  _Buffer_ptr _PDestBuf = _Dst;
2738  size_t _NumElemsToCopyRemaining = _NumElemsToCopy;
2739  size_t _PreferredNumElemsToCopyPerChunk = _Preferred_copy_chunk_num_elems;
2740  if (_PreferredNumElemsToCopyPerChunk == 0) {
2741  // If a preferred copy chunk size was not specified, lets pick one based on the
2742  // size of the copy
2743  _PreferredNumElemsToCopyPerChunk = _Get_preferred_copy_chunk_num_elems(_NumElemsToCopy, sizeof(_Value_type));
2744  }
2745  size_t _CurrDstOffset = _Dest_offset;
2746  InputIterator _CurrStartIter = _SrcFirst;
2747  _Event _Ev;
2748 
2749  size_t _Lcm = _Least_common_multiple(_Dst->_Get_elem_size(), sizeof(_Value_type));
2750  size_t _AdjustmentRatio = _Lcm / sizeof(_Value_type);
2751 
2752  do
2753  {
2754  size_t _AllocationNumElems = _PreferredNumElemsToCopyPerChunk;
2755  if (_NumElemsToCopyRemaining < _AllocationNumElems) {
2756  _AllocationNumElems = _NumElemsToCopyRemaining;
2757  }
2758 
2759  _Buffer_ptr _PDestStagingBuf = _Buffer::_Get_temp_staging_buffer(_Dst->_Get_accelerator_view(),
2760  _AllocationNumElems, sizeof(_Value_type));
2761 
2762  _ASSERTE(_PDestStagingBuf != NULL);
2763  _ASSERTE(_PDestStagingBuf->_Get_elem_size() == sizeof(_Value_type));
2764 
2765  InputIterator _CurrEndIter = _CurrStartIter;
2766  size_t _CurrNumElemsToCopy = _AllocationNumElems;
2767  if (_CurrNumElemsToCopy > _PDestStagingBuf->_Get_num_elems()) {
2768  _CurrNumElemsToCopy = _PDestStagingBuf->_Get_num_elems();
2769  }
2770 
2771  if (_NumElemsToCopyRemaining <= _CurrNumElemsToCopy) {
2772  _CurrNumElemsToCopy = _NumElemsToCopyRemaining;
2773  _CurrEndIter = _SrcLast;
2774  }
2775  else
2776  {
2777  // We need to adjust the _CurrNumElemsToCopy to be a multiple of the
2778  // least common multiple of the destination buffer's element size and sizeof(_Value_type).
2779  _CurrNumElemsToCopy = (_CurrNumElemsToCopy / _AdjustmentRatio) * _AdjustmentRatio;
2780  std::advance(_CurrEndIter, _CurrNumElemsToCopy);
2781  }
2782 
2783  _ASSERTE((_CurrNumElemsToCopy % _AdjustmentRatio) == 0);
2784 
2785  // This would not actually never block since we just created this staging buffer or are using
2786  // a cached one that is not in use
2787  _PDestStagingBuf->_Map_buffer(_Write_access, true /* _Wait */);
2788 
2789  // Copy from input to the staging using a raw pointer as OutputIterator
2790  std::copy(_CurrStartIter, _CurrEndIter, stdext::make_unchecked_array_iterator(reinterpret_cast<_Value_type*>(_PDestStagingBuf->_Get_host_ptr())));
2791 
2792  _Ev = _Ev._Add_event(_PDestStagingBuf->_Copy_to_async(_PDestBuf, _CurrNumElemsToCopy, 0, _CurrDstOffset));
2793 
2794  // Adjust the iterators and offsets
2795  _NumElemsToCopyRemaining -= _CurrNumElemsToCopy;
2796  _CurrDstOffset += (_CurrNumElemsToCopy * sizeof(_Value_type)) / _Dst->_Get_elem_size();
2797  _CurrStartIter = _CurrEndIter;
2798 
2799  } while (_NumElemsToCopyRemaining != 0);
2800 
2801  return _Ev;
2802  }
2803  }
2804 
2805  // The std::advance method is only supported for InputIterators and hence we have a custom implementation
2806  // which forwards to the std::advance if the iterator is an input iterator and uses a loop based advance
2807  // implementation otherwise
2808  template<typename _InputIterator, typename _Distance>
2809  typename std::enable_if<std::is_base_of<std::input_iterator_tag, typename std::iterator_traits<_InputIterator>::iterator_category>::value>::type
2810  _Advance_output_iterator(_InputIterator &_Iter, _Distance _N)
2811  {
2812  std::advance(_Iter, _N);
2813  }
2814 
2815  template<typename _OutputIterator, typename _Distance>
2816  typename std::enable_if<!std::is_base_of<std::input_iterator_tag, typename std::iterator_traits<_OutputIterator>::iterator_category>::value>::type
2817  _Advance_output_iterator(_OutputIterator &_Iter, size_t _N)
2818  {
2819  for (size_t i = 0; i < _N; ++i)
2820  {
2821  _Iter++;
2822  }
2823  }
2824 
2825  template <typename OutputIterator, typename _Value_type>
2826  inline _Event _Copy_impl(_In_ _Buffer *_Src, size_t _Src_offset, size_t _Num_elems,
2827  OutputIterator _DestIter, size_t _Preferred_copy_chunk_num_elems = 0)
2828  {
2829  if ((_Src == NULL) || ((_Src_offset + _Num_elems) > _Src->_Get_num_elems())) {
2830  throw runtime_exception("Failed to copy to buffer.", E_INVALIDARG);
2831  }
2832 
2833  if (_Num_elems == 0) {
2834  return _Event();
2835  }
2836 
2837  size_t _NumElemsToCopy = (_Num_elems * _Src->_Get_elem_size()) / sizeof(_Value_type);
2838 
2839  // If the src is host accessible for readthen we do the copy on
2840  // accelerator(accelerator::cpu_accelerator).default_view
2841  if (_Src->_Is_host_accessible(_Read_access))
2842  {
2843  // Map the _Src buffer
2844  _Event _Ev = _Src->_Map_buffer_async(_Read_access);
2845 
2846  // The _Src is accessible on host. We just need to do a std::copy using a raw pointer as OutputIterator
2847  _Buffer_ptr _PSrcBuf = _Src;
2848  _Ev = _Ev._Add_continuation(std::function<_Event()>([_PSrcBuf, _Src_offset, _DestIter, _NumElemsToCopy]() mutable -> _Event
2849  {
2850  // The _Src is accessible on host. We just need to do a std::copy
2851  const _Value_type *_PFirst = reinterpret_cast<const _Value_type*>(reinterpret_cast<char*>(_PSrcBuf->_Get_host_ptr()) + (_Src_offset * _PSrcBuf->_Get_elem_size()));
2852  std::copy(_PFirst, _PFirst + _NumElemsToCopy, _DestIter);
2853 
2854  return _Event();
2855  }));
2856 
2857  return _Ev;
2858  }
2859  else
2860  {
2861  // The _Src is on the device. We need to copy it out to a temporary staging array
2862  // We may create a staging buffer of size smaller than the copy size and in that case we will
2863  // perform the copy as a series of smaller copies
2864 
2865  _Event _Ev;
2866 
2867  _Buffer_ptr _PSrcBuf = _Src;
2868  size_t _PreferredNumElemsToCopyPerChunk = _Preferred_copy_chunk_num_elems;
2869  if (_PreferredNumElemsToCopyPerChunk == 0) {
2870  // If a preferred copy chunk size was not specified, lets pick one based on the
2871  // size of the copy
2872  _PreferredNumElemsToCopyPerChunk = _Get_preferred_copy_chunk_num_elems(_NumElemsToCopy, sizeof(_Value_type));
2873  }
2874 
2875  size_t _AllocationNumElems = _PreferredNumElemsToCopyPerChunk;
2876  if (_NumElemsToCopy < _AllocationNumElems) {
2877  _AllocationNumElems = _NumElemsToCopy;
2878  }
2879 
2880  _Buffer_ptr _PSrcStagingBuf = _Buffer::_Get_temp_staging_buffer(_Src->_Get_accelerator_view(),
2881  _AllocationNumElems, sizeof(_Value_type));
2882 
2883  _ASSERTE(_PSrcStagingBuf != NULL);
2884  _ASSERTE(_PSrcStagingBuf->_Get_elem_size() == sizeof(_Value_type));
2885 
2886  // The total byte size of a copy chunk must be an integral multiple of both the
2887  // source buffer's element size and sizeof(_Value_type).
2888  size_t _Lcm = _Least_common_multiple(_Src->_Get_elem_size(), sizeof(_Value_type));
2889  size_t _AdjustmentRatio = _Lcm / sizeof(_Value_type);
2890 
2891  size_t _CurrNumElemsToCopy = _AllocationNumElems;
2892  if (_CurrNumElemsToCopy > _PSrcStagingBuf->_Get_num_elems()) {
2893  _CurrNumElemsToCopy = _PSrcStagingBuf->_Get_num_elems();
2894  }
2895  if (_NumElemsToCopy <= _CurrNumElemsToCopy)
2896  {
2897  _CurrNumElemsToCopy = _NumElemsToCopy;
2898  }
2899  else
2900  {
2901  // We need to adjust the _StagingBufNumElems to be a multiple of the
2902  // least common multiple of the source buffer's element size and sizeof(_Value_type).
2903  _CurrNumElemsToCopy = (_CurrNumElemsToCopy / _AdjustmentRatio) * _AdjustmentRatio;
2904  }
2905 
2906  _ASSERTE((_CurrNumElemsToCopy % _AdjustmentRatio) == 0);
2907 
2908  size_t _NumElemsToCopyRemaining = _NumElemsToCopy - _CurrNumElemsToCopy;
2909 
2910  _Ev = _PSrcBuf->_Copy_to_async(_PSrcStagingBuf, (_CurrNumElemsToCopy * sizeof(_Value_type)) / _PSrcBuf->_Get_elem_size(), _Src_offset, 0);
2911 
2912  if (_NumElemsToCopyRemaining != 0)
2913  {
2914  _Ev = _Ev._Add_continuation(std::function<_Event()>([_DestIter, _PSrcBuf, _PSrcStagingBuf,
2915  _CurrNumElemsToCopy, _NumElemsToCopyRemaining,
2916  _Src_offset, _PreferredNumElemsToCopyPerChunk]() mutable -> _Event
2917  {
2918  // Initiate an asynchronous copy of the remaining part so that this part of the copy
2919  // makes progress while we consummate the copying of the first part
2920  size_t _CurrSrcOffset = _Src_offset + ((_CurrNumElemsToCopy * sizeof(_Value_type)) / _PSrcBuf->_Get_elem_size());
2921  OutputIterator _CurrDestIter = _DestIter;
2922  _Advance_output_iterator<decltype(_CurrDestIter), size_t>(_CurrDestIter, _CurrNumElemsToCopy);
2923  _Event _Ret_ev = _Copy_impl<OutputIterator, _Value_type>(_PSrcBuf._Get_ptr(), _CurrSrcOffset,
2924  (_NumElemsToCopyRemaining * sizeof(_Value_type)) / _PSrcBuf->_Get_elem_size(),
2925  _CurrDestIter, _PreferredNumElemsToCopyPerChunk);
2926 
2927  // Now copy the data from staging buffer to the destination
2928  _Value_type *_PFirst = reinterpret_cast<_Value_type*>(_PSrcStagingBuf->_Get_host_ptr());
2929  std::copy(_PFirst, _PFirst + _CurrNumElemsToCopy, _DestIter);
2930  return _Ret_ev;
2931  }));
2932  }
2933  else
2934  {
2935  _Ev = _Ev._Add_continuation(std::function<_Event()>([_DestIter, _PSrcStagingBuf, _CurrNumElemsToCopy]() mutable -> _Event
2936  {
2937  _Value_type *_PFirst = reinterpret_cast<_Value_type*>(_PSrcStagingBuf->_Get_host_ptr());
2938  std::copy(_PFirst, _PFirst + _CurrNumElemsToCopy, _DestIter);
2939  return _Event();
2940  }));
2941  }
2942 
2943  return _Ev;
2944  }
2945  }
2946 
2947  // Structured copy between buffers across AVs
2948  _AMPIMP _Event __cdecl _Copy_impl(_In_ _Buffer *_Src, _View_shape_ptr _Src_shape, _Out_ _Buffer * _Dst, _View_shape_ptr _Dst_shape);
2949 
2951  {
2953  const unsigned int _Rank,
2954  const unsigned int _Src_linear_offset,
2955  const unsigned int * _Src_extents,
2956  const unsigned int * _Src_copy_offset,
2957  const unsigned int _Dst_linear_offset,
2958  const unsigned int * _Dst_extents,
2959  const unsigned int * _Dst_copy_offset,
2960  const unsigned int * _Copy_extents)
2961  {
2962  this->_Rank = _Rank;
2963 
2964  this->_Src_linear_offset = _Src_linear_offset;
2965  this->_Src_extents.assign( _Src_extents, _Src_extents + _Rank);
2966  this->_Src_copy_offset.assign( _Src_copy_offset, _Src_copy_offset + _Rank);
2967 
2968  this->_Dst_linear_offset = _Dst_linear_offset;
2969  this->_Dst_extents.assign( _Dst_extents, _Dst_extents + _Rank);
2970  this->_Dst_copy_offset.assign( _Dst_copy_offset, _Dst_copy_offset + _Rank);
2971 
2972  this->_Copy_extents.assign( _Copy_extents, _Copy_extents + _Rank);
2973  }
2974 
2976 
2977  unsigned int _Rank;
2978 
2979  // Shape of source
2980  unsigned int _Src_linear_offset;
2981  std::vector<unsigned int> _Src_extents;
2982  std::vector<unsigned int> _Src_copy_offset;
2983 
2984  // Shape of destination
2985  unsigned int _Dst_linear_offset;
2986  std::vector<unsigned int> _Dst_extents;
2987  std::vector<unsigned int> _Dst_copy_offset;
2988 
2989  // Shape of copy region
2990  std::vector<unsigned int> _Copy_extents;
2991  };
2992 
2993  // Declaration
2994  _AMPIMP HRESULT __cdecl _Recursive_array_copy(const _Array_copy_desc& _Desc,
2995  unsigned int _Native_copy_rank,
2996  std::function<HRESULT(const _Array_copy_desc &_Reduced)> _Native_copy_func);
2997 
2998  _AMPIMP std::pair<accelerator_view, accelerator_view> __cdecl _Get_src_dest_accelerator_view(_In_opt_ const _Buffer_descriptor *_SrcBuffDescPtr,
2999  _In_opt_ const _Buffer_descriptor *_DestBuffDescPtr);
3000 
3001  // Iterator based copy function
3002  template<typename _InputInterator, typename _OutputIterator>
3003  inline _Event _Copy_impl_iter(_InputInterator _SrcFirst, _InputInterator _SrcLast, _OutputIterator _DstFirst)
3004  {
3005  std::copy(_SrcFirst, _SrcLast, _DstFirst);
3006  return _Event();
3007  }
3008 
3009  // Iterator based copy function
3010  template <typename InputIterator, typename _Value_type>
3011  inline _Event _Copy_impl(InputIterator _SrcFirst, _View_shape_ptr _Src_shape, _Inout_ _Buffer * _Dst, _View_shape_ptr _Dst_shape)
3012  {
3013  _ASSERTE(_Dst != NULL);
3014  _ASSERTE(_Src_shape != NULL);
3015  _ASSERTE(_Dst_shape != NULL);
3016 
3017  if (_Src_shape->_Is_projection()) {
3018  _Src_shape = _Src_shape->_Get_reduced_shape_for_copy();
3019  }
3020 
3021  if (_Dst_shape->_Is_projection()) {
3022  _Dst_shape = _Dst_shape->_Get_reduced_shape_for_copy();
3023  }
3024 
3025  _ASSERTE(_Src_shape->_Get_rank() == _Dst_shape->_Get_rank());
3026 
3027  _ASSERTE(_View_shape::_Compare_extent_with_elem_size(_Src_shape->_Get_rank(), _Src_shape->_Get_view_extent(),
3028  sizeof(_Value_type), _Dst_shape->_Get_view_extent(), _Dst->_Get_elem_size()));
3029 
3030  if (_Dst->_Is_host_accessible(_Write_access))
3031  {
3032  // The destination buffer is accesible on the host. Map the _Dst buffer
3033  _Event _Ev = _Dst->_Map_buffer_async(_Write_access);
3034  _Buffer_ptr _PDestBuf = _Dst;
3035  return _Ev._Add_continuation(std::function<_Event()>([_SrcFirst, _Src_shape, _PDestBuf, _Dst_shape]() mutable -> _Event {
3036  return _Copy_impl_iter(_SrcFirst, _Src_shape, stdext::make_unchecked_array_iterator(reinterpret_cast<_Value_type*>(_PDestBuf->_Get_host_ptr())),
3037  _Create_reinterpreted_shape(_Dst_shape, _PDestBuf->_Get_elem_size(), sizeof(_Value_type)));
3038  }));
3039  }
3040  else
3041  {
3042  // The dest buffer is not accesible on host. Lets create a temporary
3043  // staging buffer on the destination buffer's accelerator_view
3044  _Buffer_ptr _PTempStagingBuf = _Buffer::_Create_stage_buffer(_Dst->_Get_accelerator_view(), accelerator(accelerator::cpu_accelerator).default_view,
3045  _Src_shape->_Get_view_size(), sizeof(_Value_type), true /* _Is_temp */);
3046 
3047  _PTempStagingBuf->_Map_buffer(_Write_access, true /* _Wait */);
3048  _Value_type *_Dst_ptr = reinterpret_cast<_Value_type*>(_PTempStagingBuf->_Get_host_ptr());
3049  _Event _Ev = _Copy_impl_iter(_SrcFirst, _Src_shape, stdext::make_unchecked_array_iterator(_Dst_ptr), _Src_shape);
3050 
3051  // Now copy from the staging buffer to the destination buffer
3052  _Buffer_ptr _PDestBuf = _Dst;
3053  return _Ev._Add_continuation(std::function<_Event()>([_PTempStagingBuf, _Src_shape, _PDestBuf, _Dst_shape]() mutable -> _Event {
3054  return _Copy_impl(_PTempStagingBuf, _Src_shape, _PDestBuf, _Dst_shape);
3055  }));
3056  }
3057  }
3058 
3059  template <typename OutputIterator, typename _Value_type>
3060  inline _Event _Copy_impl(_In_ _Buffer *_Src, _View_shape_ptr _Src_shape, OutputIterator _DestIter, _View_shape_ptr _Dst_shape)
3061  {
3062  _ASSERTE(_Src != NULL);
3063  _ASSERTE(_Src_shape != NULL);
3064  _ASSERTE(_Dst_shape != NULL);
3065 
3066  if (_Src_shape->_Is_projection()) {
3067  _Src_shape = _Src_shape->_Get_reduced_shape_for_copy();
3068  }
3069 
3070  if (_Dst_shape->_Is_projection()) {
3071  _Dst_shape = _Dst_shape->_Get_reduced_shape_for_copy();
3072  }
3073 
3074  _ASSERTE(_Src_shape->_Get_rank() == _Dst_shape->_Get_rank());
3075 
3076  _ASSERTE(_View_shape::_Compare_extent_with_elem_size(_Src_shape->_Get_rank(), _Src_shape->_Get_view_extent(),
3077  _Src->_Get_elem_size(), _Dst_shape->_Get_view_extent(), sizeof(_Value_type)));
3078 
3079  if (_Src->_Is_host_accessible(_Read_access))
3080  {
3081  // The source buffer is accessible on the host. Map the _Src buffer
3082  _Event _Ev = _Src->_Map_buffer_async(_Read_access);
3083 
3084  _Buffer_ptr _PSrcBuf = _Src;
3085  return _Ev._Add_continuation(std::function<_Event()>([_PSrcBuf, _Src_shape, _DestIter, _Dst_shape]() mutable -> _Event {
3086  return _Copy_impl_iter(reinterpret_cast<_Value_type*>(_PSrcBuf->_Get_host_ptr()),
3087  _Create_reinterpreted_shape(_Src_shape, _PSrcBuf->_Get_elem_size(), sizeof(_Value_type)),
3088  _DestIter, _Dst_shape);
3089  }));
3090  }
3091  else
3092  {
3093  // The source buffer is not accessible on host. Lets create a temporary
3094  // staging buffer on the source buffer's accelerator_view and initiate a copy
3095  // from the source buffer to the temporary staging buffer
3096  _Buffer_ptr _PTempStagingBuf = _Buffer::_Create_stage_buffer(_Src->_Get_accelerator_view(), accelerator(accelerator::cpu_accelerator).default_view,
3097  _Dst_shape->_Get_view_size(), sizeof(_Value_type), true);
3098 
3099  _Event _Ev = _Src->_Copy_to_async(_PTempStagingBuf, _Src_shape, _Dst_shape);
3100  return _Ev._Add_continuation(std::function<_Event()>([_PTempStagingBuf, _Dst_shape, _DestIter]() mutable -> _Event {
3101  return _Copy_impl_iter(reinterpret_cast<_Value_type*>(_PTempStagingBuf->_Get_host_ptr()),
3102  _Dst_shape, _DestIter, _Dst_shape);
3103  }));
3104  }
3105  }
3106 
3107  // Iterator based structured copy function
3108  template<typename _InputInterator, typename _OutputIterator>
3109  inline _Event _Copy_impl_iter(_InputInterator _SrcIter, _View_shape_ptr _Src_shape,
3110  _OutputIterator _DstIter, _View_shape_ptr _Dst_shape)
3111  {
3112  if (_Src_shape->_Is_projection()) {
3113  _Src_shape = _Src_shape->_Get_reduced_shape_for_copy();
3114  }
3115 
3116  if (_Dst_shape->_Is_projection()) {
3117  _Dst_shape = _Dst_shape->_Get_reduced_shape_for_copy();
3118  }
3119 
3120  _ASSERTE(_Src_shape->_Get_rank() == _Dst_shape->_Get_rank());
3121  _ASSERTE(_View_shape::_Compare_extent(_Src_shape->_Get_rank(), _Src_shape->_Get_view_extent(), _Dst_shape->_Get_view_extent()));
3122 
3123  // If both the _Src_shape and _Dst_shape are linear we can be more efficient
3124  unsigned int _Src_linear_offset, _Src_linear_size, _Dst_linear_offset, _Dst_linear_size;
3125  if (_Src_shape->_Is_view_linear(_Src_linear_offset, _Src_linear_size) &&
3126  _Dst_shape->_Is_view_linear(_Dst_linear_offset, _Dst_linear_size))
3127  {
3128  _ASSERTE(_Src_linear_size == _Dst_linear_size);
3129 
3130  // These iterators might be not contiguous, therefore we use std::advance
3131  std::advance(_SrcIter, _Src_linear_offset);
3132  auto _SrcLast = _SrcIter;
3133  std::advance(_SrcLast, _Src_linear_size);
3134  std::advance(_DstIter, _Dst_linear_offset);
3135 
3136  return _Copy_impl_iter(_SrcIter, _SrcLast, _DstIter);
3137  }
3138 
3139  std::vector<unsigned int> _Src_extent(_Src_shape->_Get_rank());
3140  std::vector<unsigned int> _Src_offset(_Src_shape->_Get_rank());
3141  std::vector<unsigned int> _Dst_extent(_Dst_shape->_Get_rank());
3142  std::vector<unsigned int> _Dst_offset(_Dst_shape->_Get_rank());
3143  std::vector<unsigned int> _Copy_extent(_Src_shape->_Get_rank());
3144 
3145  for (size_t i = 0; i < _Src_shape->_Get_rank(); ++i) {
3146  _Src_extent[i] = _Src_shape->_Get_base_extent()[i];
3147  _Src_offset[i] = _Src_shape->_Get_view_offset()[i];
3148  _Dst_extent[i] = _Dst_shape->_Get_base_extent()[i];
3149  _Dst_offset[i] = _Dst_shape->_Get_view_offset()[i];
3150  _Copy_extent[i] = _Src_shape->_Get_view_extent()[i];
3151  }
3152 
3153  _Array_copy_desc _Desc(
3154  _Src_shape->_Get_rank(),
3155  _Src_shape->_Get_linear_offset(),
3156  _Src_extent.data(),
3157  _Src_offset.data(),
3158  _Dst_shape->_Get_linear_offset(),
3159  _Dst_extent.data(),
3160  _Dst_offset.data(),
3161  _Copy_extent.data());
3162 
3163  // Note: Capturing shape pointers would be incorrect, they are valid for setting up the call.
3164  // They might be deleted right after this call completes.
3165  HRESULT hr = _Recursive_array_copy(_Desc, 1, [_SrcIter, _DstIter](const _Array_copy_desc &_Reduced) -> HRESULT {
3166 
3167  auto _SrcFirst = _SrcIter;
3168  auto _DstFirst = _DstIter;
3169 
3170  std::advance(_DstFirst, _Reduced._Dst_linear_offset + _Reduced._Dst_copy_offset[0]);
3171  std::advance(_SrcFirst, _Reduced._Src_linear_offset + _Reduced._Src_copy_offset[0]);
3172  auto _SrcLast = _SrcFirst;
3173  std::advance(_SrcLast, _Reduced._Copy_extents[0]);
3174 
3175  std::copy(_SrcFirst, _SrcLast, _DstFirst);
3176 
3177  return S_OK;
3178  });
3179 
3180  if (FAILED(hr)) {
3181  throw Concurrency::runtime_exception("Failed to copy between buffers", E_FAIL);
3182  }
3183 
3184  return _Event();
3185  }
3186 
3187  // A ubiquitous buffer that provides access to the underlying data
3188  // on any accelerator_view
3190  {
3191  friend _Event _Get_access_async(const _View_key _Key, accelerator_view _Av, _Access_mode _Mode, _Buffer_ptr &_Buf_ptr);
3192  friend _AMPIMP accelerator_view __cdecl _Select_copy_src_accelerator_view(_In_ _View_key _Src_view_key, const accelerator_view &_Dest_accelerator_view);
3193  friend struct _DPC_call_handle;
3194 
3195  public:
3196 
3197  _AMPIMP static _Ret_ _Ubiquitous_buffer * __cdecl _Create_ubiquitous_buffer(size_t _Num_elems, size_t _Elem_size);
3198 
3199  _AMPIMP static _Ret_ _Ubiquitous_buffer * __cdecl _Create_ubiquitous_buffer(_Buffer_ptr _Master_buffer);
3200 
3201  // Register a new view on top of this _Ubiquitous_buffer
3202  _AMPIMP void _Register_view(_In_ _View_key _Key, accelerator_view _Cpu_av, _View_shape_ptr _Shape, _In_opt_ const _View_key _Source_view_key = nullptr);
3203 
3204  // Register a copy of an existing view registered with this _Ubiquitous_buffer
3205  _AMPIMP void _Register_view_copy(_In_ _View_key _New_view_key, _In_ _View_key _Existing_view_key);
3206 
3207  // Unregister a view currently registered with this _Ubiquitous_buffer
3208  _AMPIMP void _Unregister_view(_In_ _View_key _Key);
3209 
3210  // Obtain a specified mode of access to the specified view on the specified target
3211  // accelerator_view. This method also serves the purpose of determining the
3212  // amount of data copy expected to happen as part of this _Get_access request
3213  // without actually performing the copies or state updates in the _Ubiquitous_buffer. This
3214  // is used for reporting the implicit data copies that happen when accessing array_views
3215  // in C++ AMP ETW events
3216  _AMPIMP _Event _Get_access_async(_In_ _View_key _Key, _Accelerator_view_impl_ptr _Av_view_impl_ptr,
3217  _Access_mode _Mode, _Buffer_ptr &_Buf_ptr,
3218  _Inout_opt_ ULONGLONG *_Sync_size = nullptr);
3219 
3220  // Discard the content underlying this view
3221  _AMPIMP void _Discard(_In_ _View_key _Key);
3222 
3223  // This method does not synchonize the copies. Should not be used for getting
3224  // data access but only to get the underlying buffer's properties
3225  _AMPIMP _Buffer_ptr _Get_master_buffer() const;
3226 
3227  _AMPIMP accelerator_view _Get_master_accelerator_view() const;
3228 
3229  _AMPIMP _View_shape_ptr _Get_view_shape(_In_ _View_key _Key);
3230 
3231  _Ret_ _Accelerator_view_impl* _Get_master_accelerator_view_impl() const
3232  {
3233  return _M_master_av;
3234  }
3235 
3237  {
3238  return _M_master_buffer_elem_size;
3239  }
3240 
3242  {
3243  return _M_master_buffer_num_elems;
3244  }
3245 
3246  bool _Has_data_source() const
3247  {
3248  return _M_has_data_source;
3249  }
3250 
3251  private:
3252 
3253  // The _Ubiquitous_buffer constructors are private to force construction through the static
3254  // _Create_ubiquitous_buffer method to ensure the object is allocated in the runtime
3255  _Ubiquitous_buffer(size_t _Num_elems, size_t _Elem_size);
3256  _Ubiquitous_buffer(_In_ _Buffer* _Master_buffer);
3257 
3258  // Private destructor to force deletion through _Release
3259  ~_Ubiquitous_buffer();
3260 
3261  // No default consturctor, copy constructor and assignment operator
3265 
3266  // Helper methods
3267 
3268  // Get access to a buffer on a specified accelerator for a specified pre-registered view.
3269  // If _Sync_size parameter is not null, then function calculates number of bytes that we
3270  // need to synchronize to get desired access.
3271  _AMPIMP _Event _Get_access_async(_In_ _View_key _Key, accelerator_view _Av, _Access_mode _Mode,
3272  _Buffer_ptr &_Buf_ptr, _Inout_opt_ ULONGLONG *_Sync_size = NULL);
3273 
3274  // Commit a view to the master buffer if needed. When the _Sync_size parameter is non-null
3275  // this method just returns the amount of data to be copied as part of the commit, without
3276  // actually performing the commit
3277  _Event _Commit_view_async(_In_ _View_info *_Info, _Inout_ ULONGLONG *_Sync_size = nullptr);
3278 
3279  // Get the _Buffer_ptr corresponding to a specified accelerator_view. When the
3280  // _Create parameter is true, it creates a new _Buffer if one does not already exist
3281  // for that accelerator_view
3282  _Ret_ _Buffer* _Get_buffer(_In_ _Accelerator_view_impl* _Av, bool _Create = true);
3283 
3284  // Sets a new access mode for the specified view
3285  void _Set_new_access_mode(_Inout_ _View_info *_Info, _Access_mode _New_mode);
3286 
3287  // Unsets the discard flag from the specified view and all other
3288  // overlapping views
3289  void _Unset_discard_flag(_Inout_ _View_info *_Info);
3290 
3291  // Determines whether the data underlying the specified view has been discarded
3292  // based on whether a subsuming view has the discard flag set.
3293  bool _Should_discard(const _View_info *_Info, _In_opt_ const _View_key _Source_view_key = nullptr) const;
3294 
3295  // Does this view have exclusive data which is not discarded,
3296  // not on the master accelerator_view and also there is not other view
3297  // that subsumes this view and is marked dirty
3298  bool _Has_exclusive_data(const _View_info *_Info) const;
3299 
3300  // Based on the current state of overlapping views in the _Ubiquitous_buffer
3301  // does the specified view require a data update on the target accelerator_view
3302  // to fulfil an access request
3303  bool _Requires_update_on_target_accelerator_view(const _View_info *_Info,
3304  _Access_mode _Requested_mode,
3305  _In_ _Accelerator_view_impl* _Target_acclerator_view) const;
3306 
3307  // This method iterates over all views in the specified commit list
3308  // and flags them as "commit not needed" if that view is subsumed by another view present in the
3309  // commit list
3310  static void _Flag_redundant_commits(std::vector<std::pair<_View_info*, bool>> &_Commit_list);
3311 
3312  // This method returns the list of accelerator_views where the specified view already has
3313  // a valid cached copy of the data and getting read access would not incur any data movement.
3314  // The _Can_access_anywhere parameter is an output parameter used to indicate to the
3315  // caller that the specified view can be accessed on any accelerator_view without incurring
3316  // any data movement. This is true when there are no modified overlapping views that require
3317  // synchronization and the specified view has the discard_data flag set.
3318  // This method is used for determining the source accelerator_view for copy and p_f_e operations
3319  // involving array_views
3320  _Accelerator_view_unordered_set _Get_caching_info(_In_ _View_key _Key, _Out_opt_ bool *_Can_access_anywhere = NULL);
3321 
3322  _Accelerator_view_unordered_set _Get_caching_info_impl(_In_ _View_key _Key, _Out_opt_ bool *_Can_access_anywhere);
3323 
3324  _Ret_ _Accelerator_view_impl* _Determine_alternate_target_accelerator_view(_In_ _View_key _Key,
3325  _In_ _Accelerator_view_impl* _Original_av,
3326  _Access_mode _Mode);
3327 
3328  const _View_info * _Get_view_info_ptr(_In_ const _View_key key) const
3329  {
3330  auto const iterator = _M_view_map.find(key);
3331  return _M_view_map.end() == iterator ? nullptr : iterator->second;
3332  }
3333 
3334  private:
3335 
3336  // Private data
3337 
3338  // The master accelerator_view for this _Ubiquitous_buffer
3339  // which is specified at construction time
3340  _Accelerator_view_impl_ptr _M_master_av;
3341 
3342  // The master _Buffer corresponding to this _Ubiquitous_buffer
3343  // which is specified at construction time
3345 
3346  // The size of each element of the master buffer
3348 
3349  // The number of elements in the master buffer
3351 
3352  // Indicates if this ubiquitous buffer has an underlying data source
3354 
3355  // A map of pre-created _Buffers corresponding to different
3356  // accelerator_views where the _Ubiquitous_buffer has already been
3357  // accessed
3358  std::map<_Accelerator_view_impl_ptr, _Buffer_ptr> _M_buffer_map;
3359 
3360  // A mapping between all registered view keys in this _Ubiquitous_buffer
3361  // to their corresponding _View_info
3362  std::unordered_map<_View_key, _View_info*> _M_view_map;
3363 
3364  // Set of distinct views of this buffer. As multiple copies of the same
3365  // view may have been registered for this _Ubiquitous_buffer, this set
3366  // maintains the set of distinct views which really matter for the
3367  // caching protocol. Also, note that some view_info may not have any live registered
3368  // and hence does not exist in the _M_view_map but may exist here since
3369  // it has uncomiitted data which needs to be considered as part of the cache
3370  // coherence protocol to prevent modifications underlying this view from being lost
3371  std::unordered_set<_View_info*> _M_view_info_set;
3372 
3373  // Critical section object to protect the cache directory
3375  };
3376 
3377  // Class defines functions for interoperability with D3D
3379  {
3380  public:
3381  _AMPIMP static _Ret_ IUnknown * __cdecl _Get_D3D_buffer(_In_ _Buffer *_Buffer_ptr);
3382  _AMPIMP static _Ret_ IUnknown * __cdecl _Get_D3D_texture(_In_ _Texture *_Texture_ptr);
3383  _AMPIMP static _Ret_ void * __cdecl _Get_D3D_sampler_data_ptr(_In_ IUnknown *_D3D_sampler);
3384  _AMPIMP static void __cdecl _Release_D3D_sampler_data_ptr(_In_ void *_Sampler_data_ptr);
3385  _AMPIMP static _Ret_ IUnknown * __cdecl _Get_D3D_sampler(const Concurrency::accelerator_view &_Av, _In_ _Sampler *_Sampler_ptr);
3386  };
3387 
3388  inline
3389  _Event _Get_access_async(const _View_key _Key, accelerator_view _Av, _Access_mode _Mode, _Buffer_ptr &_Buf_ptr)
3390  {
3391  return _Key->_Get_buffer_ptr()->_Get_access_async(_Key->_Get_view_key(), _Av, _Mode, _Buf_ptr);
3392  }
3393 
3394  inline
3396  {
3397  return _Descriptor._Get_buffer_ptr()->_Get_view_shape(_Descriptor._Get_view_key());
3398  }
3399 
3400  inline
3401  bool _Is_cpu_accelerator(const accelerator& _Accl)
3402  {
3403  return (_Accl.device_path == accelerator::cpu_accelerator);
3404  }
3405 
3406 } // namespace Concurrency::details
3407 
3408 } // namespace Concurrency
3409 
3410 // =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
3411 //
3412 // Compiler/Runtime Interface
3413 //
3414 // =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
3415 
3416 #define HELPERAPI __cdecl
3417 
3418 using namespace Concurrency::details;
3419 
3420 extern "C" {
3421 
3422  // This structure is used for storing information about resources required by the kernel.
3424  {
3428  };
3429 
3431  {
3432  _Resource_kind _M_resource_kind; // buffer, texture, or sampler
3433 
3434  void * _M_desc; // Pointer to the _Buffer_descriptor/_Texture_descriptor/_Sampler_descriptor instance
3435  // which underlies all device resource
3436 
3437  _Access_mode _M_formal_access_mode; // scalar: read-only
3438  // const scalar ref: read-only
3439  // scalar ref: ReadWrite
3440  // array: ReadWrite
3441  // const array: ReadOnly
3443 
3444  BOOL _Is_buffer() const
3445  {
3446  return (_M_resource_kind == RESOURCE_BUFFER);
3447  }
3448 
3449  BOOL _Is_texture() const
3450  {
3451  return (_M_resource_kind == RESOURCE_TEXTURE);
3452  }
3453 
3454  BOOL _Is_sampler() const
3455  {
3456  return (_M_resource_kind == RESOURCE_SAMPLER);
3457  }
3458 
3460  {
3461  _ASSERTE(_Is_buffer());
3462  return reinterpret_cast<_Buffer_descriptor *>(_M_desc);
3463  }
3464 
3466  {
3467  _ASSERTE(_Is_texture());
3468  return reinterpret_cast<_Texture_descriptor *>(_M_desc);
3469  }
3470 
3472  {
3473  _ASSERTE(_Is_sampler());
3474  return reinterpret_cast<_Sampler_descriptor *>(_M_desc);
3475  }
3476 
3477  _Ret_ void * _Get_resource_ptr() const
3478  {
3479  if (_Is_buffer())
3480  {
3481  _Ubiquitous_buffer * _Tmp = _Get_buffer_desc()->_Get_buffer_ptr();
3482  return reinterpret_cast<void *>(_Tmp);
3483  }
3484  else if (_Is_texture())
3485  {
3486  _Texture * _Tmp = _Get_texture_desc()->_Get_texture_ptr();
3487  return reinterpret_cast<void *>(_Tmp);
3488  }
3489  else
3490  {
3491  _ASSERTE(_Is_sampler());
3492  _Sampler * _Tmp = _Get_sampler_desc()->_Get_sampler_ptr();
3493  return reinterpret_cast<void *>(_Tmp);
3494  }
3495  }
3496  };
3497 
3498  // This structure is used for storing information about the const buffers
3500  {
3501  void * _M_data; // Pointer to the host data to intialize the
3502  // constant buffer with
3503 
3504  size_t _M_const_buf_size; // Size of the const buffer in bytes
3505 
3506  unsigned int _M_is_debug_data; // Is this debug data which will be
3507  // intialized by the runtime. 0 (false), 1 (true)
3508  };
3509 }
3510 
3511 namespace Concurrency
3512 {
3513 namespace details
3514 {
3516  {
3517  _Accelerator_view_impl *_M_rv;
3519 
3520  // Info about the kernel function arguments
3525 
3526  // Info about the host buffer created corresponding to the const buffer
3529 
3531 
3532  // Kernel funcs
3533  _DPC_shader_blob * _M_shader_blob;
3534 
3535  // Compute domain info
3537  unsigned int _M_compute_rank;
3538  unsigned int * _M_grid_extents;
3539 
3540  // Kernel dispatch info
3541  unsigned int _M_groupCountX;
3542  unsigned int _M_groupCountY;
3543  unsigned int _M_groupCountZ;
3544 
3545  // The shape of the group
3546  unsigned int _M_groupExtentX;
3547  unsigned int _M_groupExtentY;
3548  unsigned int _M_groupExtentZ;
3549 
3550  _DPC_call_handle(const accelerator_view &_Accelerator_view)
3551  {
3552  if (!_Accelerator_view.is_auto_selection) {
3553  _M_rv = _Get_accelerator_view_impl_ptr(_Accelerator_view);
3554  }
3555  else {
3556  _M_rv = NULL;
3557  }
3558 
3559  _M_is_explicit_target_acclview = false;
3560  if (_M_rv != NULL) {
3561  _M_is_explicit_target_acclview = true;
3562  }
3563 
3564  _M_device_resource_info = NULL;
3565  _M_num_resources = 0;
3566  _M_num_writable_buffers = 0;
3567  _M_num_samplers = 0;
3568 
3569  _M_const_buffer_info = NULL;
3570  _M_num_const_buffers = 0;
3571 
3572  _M_RW_aliasing = false;
3573 
3574  _M_shader_blob = NULL;
3575 
3576  _M_is_flat_model = 0;
3577  _M_compute_rank = 0;
3578  _M_grid_extents = NULL;
3579 
3580  _M_groupCountX = 0;
3581  _M_groupCountY = 0;
3582  _M_groupCountZ = 0;
3583 
3584  _M_groupExtentX = 0;
3585  _M_groupExtentY = 0;
3586  _M_groupExtentZ = 0;
3587  }
3588 
3590  {
3591  if (_M_grid_extents) {
3592  delete [] _M_grid_extents;
3593  }
3594  }
3595 
3596  bool _Is_buffer_aliased(_In_ void* const _Buffer_ptr) const
3597  {
3598  return ((_M_aliased_buffer_set != nullptr) && (_M_aliased_buffer_set->find(_Buffer_ptr) != _M_aliased_buffer_set->end()));
3599  }
3600 
3601  bool _Is_buffer_unaccessed(size_t const _Buffer_idx) const
3602  {
3603  return ((_M_is_device_buffer_unaccessed != nullptr) && _M_is_device_buffer_unaccessed->operator[](_Buffer_idx));
3604  }
3605 
3606  void _Set_buffer_unaccessed(size_t _Buffer_idx)
3607  {
3608  if (_M_is_device_buffer_unaccessed == nullptr) {
3609  _M_is_device_buffer_unaccessed = std::unique_ptr<std::vector<bool>>(new std::vector<bool>(_M_num_resources, false));
3610  }
3611 
3612  _M_is_device_buffer_unaccessed->operator[](_Buffer_idx) = true;
3613  }
3614 
3615  const int* _Get_redirect_indices() const
3616  {
3617  if (!_M_RW_aliasing) {
3618  return nullptr;
3619  }
3620 
3621  _ASSERTE(_M_Redirect_indices != nullptr);
3622 
3623  return _M_Redirect_indices->data();
3624  }
3625 
3626  void _Check_buffer_aliasing();
3627  void _Update_buffer_rw_property();
3628  void _Setup_aliasing_redirection_indices();
3629  void _Select_accelerator_view();
3630  void _Verify_buffers_against_accelerator_view();
3631 
3632  private:
3633  std::unique_ptr<std::unordered_set<void*>> _M_aliased_buffer_set;
3634  std::unique_ptr<std::vector<bool>> _M_is_device_buffer_unaccessed;
3635  // Info about read-write aliasing
3636  std::unique_ptr<std::vector<int>> _M_Redirect_indices;
3637  };
3638 
3639  // This structure is used for passing the scheduling
3640  // info to the parallel_for_each which is handed back
3641  // to the compiler-runtime interface methods by the front-end
3643  {
3644  // The accelerator view to invoke a parallel_for_each on
3645  accelerator_view _M_accelerator_view;
3646  };
3647 
3648 } // namespace Concurrency::details
3649 
3650 
3661 _AMPIMP void __cdecl amp_uninitialize();
3662 
3663 } // namespace Concurrency
3664 
3665 extern "C" {
3666 
3667  // Return a compiler helper handle.
3669 
3670  // Destroy the call handle
3672 
3673  _AMPIMP void HELPERAPI __dpc_set_device_resource_info(_In_ _DPC_call_handle * _Handle, _In_ _Device_resource_info * _DeviceResourceInfo, size_t _NumResources) throw(...);
3674 
3675  // Set const buffer info.
3676  _AMPIMP void HELPERAPI __dpc_set_const_buffer_info(_In_ _DPC_call_handle * _Handle, _In_ _Device_const_buffer_info * _DeviceConstBufferInfo, size_t _NumConstBuffers) throw(...);
3677 
3678  // Set the kernel shader info
3680  _Inout_ void * _ShaderBlobs) throw(...);
3681  // Set kernel dispatch info
3683  unsigned int _ComputeRank,
3684  _In_ int * _Extents,
3685  unsigned int _GroupRank,
3686  const unsigned int * _GroupExtents,
3687  unsigned int & _GroupCountX,
3688  unsigned int & _GroupCountY,
3689  unsigned int & _GroupCountZ) throw(...);
3690 
3691  // Dispatch the kernel
3692  _AMPIMP void HELPERAPI __dpc_dispatch_kernel(_In_ _DPC_call_handle * _Handle) throw(...);
3693 
3694 #ifdef _DEBUG
3695  // Dispatch the kernel passed as a HLSL source level shader
3696  // This function is to be used only for testing and debugging purposes
3697  _AMPIMP void HELPERAPI __dpc_dispatch_kernel_test(_In_ _DPC_call_handle * _Handle, _In_ WCHAR* szFileName, LPCSTR szEntryPoint) throw(...);
3698 #endif
3699 }
3700 
3701 // =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
3702 //
3703 // C++ AMP ETW Provider
3704 //
3705 // =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
3706 
3707 namespace Concurrency
3708 {
3709 namespace details
3710 {
3711 
3712 // Thread-safe factory method for _Amp_runtime_trace object
3714 
3715 // Class that gathers C++ AMP diagnostic information and triggers events
3717 {
3718 
3719 // Called by factory to create single instance of _Amp_runtime_trace type
3720 friend BOOL CALLBACK _Init_amp_runtime_trace(PINIT_ONCE _Init_once, PVOID _Param, _Inout_ PVOID *_Context);
3721 
3722 public:
3723  // Destructor for _Amp_runtime_trace, called at program termination
3725 
3726  // End event is triggered by multiple other events such us StartComputeEvent to show exactly when given activity completed
3727  _AMPIMP void _Write_end_event(ULONG _Span_id);
3728 
3729  // Add accelerator configuration information
3730  // Note: This member function does not have to be exported, it is used by C++ AMP runtime factory
3731  void _Add_accelerator_config_event(PVOID _Accelerator_id, LPCWSTR _Device_path, LPCWSTR _Device_description);
3732 
3733  // Used by callback function, to write all configuration data when new session is detected
3734  // Note: This member function does not have to be exported, it is used by C++ AMP runtime factory
3735  void _Write_all_accelerator_config_events();
3736 
3737  // Started accelerator_view::wait operation
3738  // Note: This member function does not have to be exported, it is used by C++ AMP runtime factory
3739  ULONG _Start_accelerator_view_wait_event(PVOID _Accelerator_id, PVOID _Accelerator_view_id);
3740 
3741  // Launched accelerator_view::flush operation
3742  // Note: This member function does not have to be exported, it is used by C++ AMP runtime factory
3743  void _Launch_flush_event(PVOID _Accelerator_id, PVOID _Accelerator_view_id);
3744 
3745  // Launched accelerator_view::create_marker operation
3746  // Note: This member function does not have to be exported, it is used by C++ AMP runtime factory
3747  ULONG _Launch_marker(PVOID _Accelerator_id, PVOID _Accelerator_view_id);
3748 
3749  // Below are set of helpers that take various types that were available at event injection point and extract all necessary data
3750  _AMPIMP ULONG _Start_parallel_for_each_event_helper(_In_ _DPC_call_handle *_Handle);
3751 
3752  // This helper wraps functor with wait start and wait end events
3754  {
3755  std::shared_future<void> retFuture;
3756  concurrency::task_completion_event<void> retTaskCompletionEvent;
3757 
3758  // Create a std::shared_future by creating a deferred task through std::async that waits for the
3759  // event _Ev to finish. Wrap functor with start and end events
3760  retFuture = std::async(std::launch::deferred, [=]() mutable {
3761  try
3762  {
3763  if (_Async_op_id == _Amp_runtime_trace::_M_event_disabled)
3764  {
3765  _Ev._Get();
3766  }
3767  else
3768  {
3769  auto _Span_id = details::_Get_amp_trace()->_Start_async_op_wait_event(_Async_op_id);
3770  _Ev._Get();
3772  }
3773  }
3774  catch(...)
3775  {
3776  // If an exception is encountered when executing the asynchronous operation
3777  // we should set the exception on the retTaskCompletionEvent so that it is
3778  // appropriately cancelled and the exception is propagated to continuations
3779  retTaskCompletionEvent.set_exception(std::current_exception());
3780  throw;
3781  }
3782 
3783  retTaskCompletionEvent.set();
3784  });
3785 
3786  // Register the async event with the runtime asynchronous events manager
3787  _Register_async_event(_Ev, retFuture);
3788 
3789  // Lets issue a continuation just to swallow any exceptions that are encountered during the
3790  // async operation and are never observed by the user or are just observed through the
3791  // shared_future and not through the task
3792  concurrency::task<void> retTask(retTaskCompletionEvent);
3793  retTask.then([](concurrency::task<void> _Task) {
3794  try {
3795  _Task.get();
3796  }
3797  catch(...) {
3798  }
3799  });
3800 
3801  return Concurrency::completion_future(retFuture, retTask);
3802  }
3803 
3804  _AMPIMP ULONG _Start_array_view_synchronize_event_helper(const _Buffer_descriptor &_Buff_desc);
3805  _AMPIMP ULONG _Launch_array_view_synchronize_event_helper(const _Buffer_descriptor &_Buff_desc);
3806 
3807  // Helpers for buffers (array, array_view)
3808  _AMPIMP ULONG _Start_copy_event_helper(const _Buffer_descriptor &_Src, const _Buffer_descriptor &_Dest, ULONGLONG _Num_bytes_for_copy);
3809  _AMPIMP ULONG _Start_copy_event_helper(nullptr_t, const _Buffer_descriptor &_Dest, ULONGLONG _Num_bytes_for_copy);
3810  _AMPIMP ULONG _Start_copy_event_helper(const _Buffer_descriptor &_Src, nullptr_t, ULONGLONG _Num_bytes_for_copy);
3811  _AMPIMP ULONG _Launch_async_copy_event_helper(const _Buffer_descriptor &_Src, const _Buffer_descriptor &_Dest, ULONGLONG _Num_bytes_for_copy);
3812  _AMPIMP ULONG _Launch_async_copy_event_helper(nullptr_t, const _Buffer_descriptor &_Dest, ULONGLONG _Num_bytes_for_copy);
3813  _AMPIMP ULONG _Launch_async_copy_event_helper(const _Buffer_descriptor &_Src, nullptr_t, ULONGLONG _Num_bytes_for_copy);
3814 
3815  // Helper for textures
3816  _AMPIMP ULONG _Start_copy_event_helper(const _Texture_descriptor &_Src, nullptr_t, ULONGLONG _Num_bytes_for_copy);
3817  _AMPIMP ULONG _Start_copy_event_helper(nullptr_t, const _Texture_descriptor &_Dest, ULONGLONG _Num_bytes_for_copy);
3818  _AMPIMP ULONG _Start_copy_event_helper(const _Texture_descriptor &_Src, const _Texture_descriptor &_Dest, ULONGLONG _Num_bytes_for_copy);
3819  _AMPIMP ULONG _Launch_async_copy_event_helper(const _Texture_descriptor &_Src, nullptr_t, ULONGLONG _Num_bytes_for_copy);
3820  _AMPIMP ULONG _Launch_async_copy_event_helper(nullptr_t, const _Texture_descriptor &_Dest, ULONGLONG _Num_bytes_for_copy);
3821  _AMPIMP ULONG _Launch_async_copy_event_helper(const _Texture_descriptor &_Src, const _Texture_descriptor &_Dest, ULONGLONG _Num_bytes_for_copy);
3822 
3823  void _Enable_provider(bool _Enable = true);
3824 
3825 private:
3826  // Private constructor. This type is created by factory method
3827  _Amp_runtime_trace(PVOID _Callback_function, _In_ _Trace *_Trace);
3828 
3829  // Disallow copy construction
3831 
3832  // Disallow assignment operator
3834 
3835  // Used internally to write configuation events
3836  void _Write_accelerator_config_event(const std::tuple<PVOID, LPCWSTR, LPCWSTR> &_ConfigTuple);
3837 
3838  // Event triggered when computation is scheduled
3839  ULONG _Start_parallel_for_each_event(
3840  PVOID _Accelerator_id,
3841  PVOID _Accelerator_view_id,
3842  BOOL _Is_tiled_explicitly,
3843  ULONGLONG _Num_of_tiles,
3844  ULONG _Num_of_threads_per_tile,
3845  BOOL _Is_aliased,
3846  ULONG _Num_read_only_resources,
3847  ULONG _Num_read_write_resources,
3848  ULONGLONG _Size_of_all_resouces,
3849  ULONG _Size_of_const_data,
3850  ULONGLONG _Size_of_data_for_copy);
3851 
3852  // Synchronous copy operation has started
3853  ULONG _Start_copy_event(
3854  PVOID _Src_accelerator_id,
3855  PVOID _Src_accelerator_view_id,
3856  PVOID _Dst_accelerator_id,
3857  PVOID _Dst_accelerator_view_id,
3858  ULONGLONG _Num_bytes_for_copy,
3859  BOOL _Is_src_staging,
3860  BOOL _Is_dst_staging);
3861 
3862  // Asynchronous copy operation has been launched
3863  ULONG _Launch_async_copy_event(
3864  PVOID _Src_accelerator_id,
3865  PVOID _Src_accelerator_view_id,
3866  PVOID _Dst_accelerator_id,
3867  PVOID _Dst_accelerator_view_id,
3868  ULONGLONG _Num_bytes_for_copy,
3869  BOOL _Is_src_staging,
3870  BOOL _Is_dst_staging);
3871 
3872  // Started waiting for asynchronous operation to complete
3873  _AMPIMP ULONG _Start_async_op_wait_event(ULONG _Async_op_id);
3874 
3875  // Started array_view::synchronize operation
3876  ULONG _Start_array_view_synchronize_event(ULONGLONG _Num_bytes_to_synchronize);
3877 
3878  // Async array_view::synchronize operation has been launched
3879  ULONG _Launch_array_view_synchronize_event(ULONGLONG _Num_bytes_to_synchronize);
3880 
3881  // Helper function that extracts information from buffer descriptor
3882  std::tuple<PVOID, PVOID, BOOL> _Get_resource_diagnostic_info(const _Buffer_descriptor &_Buff_desc, accelerator_view _Accl_view) const;
3883 
3884  // Helper function that extracts information from texture descriptor
3885  std::tuple<PVOID, PVOID, BOOL> _Get_resource_diagnostic_info(const _Texture_descriptor &_Tex_desc) const;
3886 
3887  // Generates unique identifiers for span_id and async_op_id
3888  ULONG _Get_unique_identifier();
3889 
3890  // Critical section object used by callback function to synchronize following situations:
3891  // a) multiple sessions have started at the same time
3892  // b) C++ AMP Runtime factory adds new accelerator config event to the collection
3894 
3895  // Collection of all configuration events at the time of C++ AMP Runtime initialization
3896  std::vector<std::tuple<PVOID, LPCWSTR, LPCWSTR>> _M_accelerator_configs;
3897 
3898  // Unique counter for span id and async operation id
3899  volatile ULONG _M_counter;
3900 
3901  // Type that implements ITrace interface and writes events e.g. ETW events
3902  _Trace* _M_trace_ptr;
3903 
3904  // Special value that we return to chain events if provider is disabled
3905  static const ULONG _M_event_disabled = 0;
3906 };
3907 
3908 // Helper function to query the number of mipmap levels from texture object
3909 inline unsigned int _Get_mipmap_levels(const _Texture *_Tex)
3910 {
3911  _ASSERTE(_Tex);
3912  return _Tex->_Get_mip_levels();
3913 }
3914 
3915 } // namespace Concurrency::details
3916 } // namespace Concurrency
3917 
3918 namespace concurrency = Concurrency;
3919 
3920 #pragma pack(pop)
unsigned int * _M_view_extent
Definition: amprt.h:1878
#define _Out_
Definition: sal.h:342
unsigned int _M_linear_offset
Definition: amprt.h:1875
_AMPIMP bool __cdecl is_timeout_disabled(const accelerator_view &_Accelerator_view)
Returns a boolean flag indicating if timeout is disabled for the specified accelerator_view. This corresponds to the D3D11_CREATE_DEVICE_DISABLE_GPU_TIMEOUT flag for Direct3D device creation.
_Event _Copy_impl(_In_ _Buffer *_Src, _View_shape_ptr _Src_shape, OutputIterator _DestIter, _View_shape_ptr _Dst_shape)
Definition: amprt.h:3060
unsigned int _M_view_mipmap_levels
Definition: amprt.h:575
static bool _Compare_extent(unsigned int _Rank, const unsigned int *_Extent1, const unsigned int *_Extent2)
Definition: amprt.h:1668
_AMPIMP scoped_d3d_access_lock & operator=(scoped_d3d_access_lock &&_Other)
Move assignment operator for scoped_d3d_access_lock: Take ownership of a lock from another scoped_d3d...
completion_future & operator=(const completion_future &_Other)
Copy assignment operator
Definition: amprt.h:1306
_Sampler_descriptor & operator=(const _Sampler_descriptor &_Other) __GPU
Definition: amprt.h:758
size_t _Get_elem_size() const
Definition: amprt.h:2024
size_t _Get_master_buffer_elem_size() const
Definition: amprt.h:3236
size_t _M_num_writable_buffers
Definition: amprt.h:3523
_CONCRTIMP void __cdecl wait(unsigned int _Milliseconds)
Pauses the current context for a specified amount of time.
Definition: concrt.h:364
unsigned int _Get_bits_per_element() const
Definition: amprt.h:2251
details::_Reference_counted_obj_ptr< details::_Buffer > _Buffer_ptr
Definition: amprt.h:308
Definition: amprt.h:3499
std::vector< unsigned int > _Copy_extents
Definition: amprt.h:2990
size_t _M_rc
Definition: amprt.h:162
_OutIt copy(_InIt _First, _InIt _Last, _OutIt _Dest)
Definition: xutility:2369
size_t _M_depth
Definition: amprt.h:2358
_Reference_counted_obj_ptr(const _Reference_counted_obj_ptr &_Other)
Definition: amprt.h:179
#define NULL
Definition: vcruntime.h:236
friend class _D3D_accelerator_view_impl
Definition: amprt.h:323
const _View_info * _Get_view_info_ptr(_In_ const _View_key key) const
Definition: amprt.h:3328
_Ret_ void * _Get_data_ptr() const
Definition: amprt.h:2012
void get() const
Returns the result this task produced. If the task is not in a terminal state, a call to get will wai...
Definition: ppltasks.h:4183
_Accelerator_view_impl_ptr _M_master_av
Definition: amprt.h:3340
size_t operator()(const accelerator_view &_Accl_view) const
Definition: amprt.h:1571
_Short_vector_base_type_id
Definition: amprt.h:291
static _AMPIMP const wchar_t direct3d_ref[]
String constant for direct3d reference accelerator
Definition: amprt.h:1045
_AMPIMP ~_Event()
Destructor of the _Event.
completion_future()
Default constructor
Definition: amprt.h:1274
const unsigned int * _Get_base_extent() const
Definition: amprt.h:1601
std::unique_ptr< std::vector< bool > > _M_is_device_buffer_unaccessed
Definition: amprt.h:3634
_DPC_shader_blob * _M_shader_blob
Definition: amprt.h:3533
#define _Out_opt_
Definition: sal.h:343
void _Get_preferred_copy_chunk_extent(unsigned int _Rank, size_t _Width, size_t _Height, size_t _Depth, size_t _Bits_per_element, _Out_writes_(3) size_t *_Preferred_copy_chunk_extent)
Definition: amprt.h:2619
#define S_OK
Definition: comutil.h:62
unchecked_array_iterator< _Iterator > make_unchecked_array_iterator(_Iterator _Ptr)
Definition: iterator:725
std::future_status wait_for(const std::chrono::duration< _Rep, _Period > &_Rel_time) const
Blocks until the associated asynchronous operation completes or _Rel_time has elapsed ...
Definition: amprt.h:1370
Definition: amprt.h:3426
Definition: amprt.h:2369
~_DPC_call_handle()
Definition: amprt.h:3589
void * _M_data
Definition: amprt.h:3501
unsigned int _Get_texture_format() const
Definition: amprt.h:2230
_AMPIMP _Event _Add_continuation(const std::function< _Event __cdecl()> &_Continuation_task)
Creates an event which is an ordered collection of this and a continuation task
_AMPIMP _Access_mode __cdecl _Get_recommended_buffer_host_access_mode(const accelerator_view &_Av)
Definition: amprt.h:3430
const size_t ERROR_MSG_BUFFER_SIZE
Definition: amprt.h:118
std::shared_future< void > _M_shared_future
Definition: amprt.h:1435
__declspec(property(get=get_version)) unsigned int version
unsigned int _Get_mip_levels() const
Definition: amprt.h:2306
size_t _M_master_buffer_num_elems
Definition: amprt.h:3350
void _Copy_data_on_host_src_iter(int _Rank, _Input_iterator _Src, _Out_ _Value_type *_Dst, size_t _Width, size_t _Height, size_t _Depth, size_t _Dst_row_pitch_in_bytes, size_t _Dst_depth_pitch_in_bytes, size_t _Src_row_pitch, size_t _Src_depth_pitch)
Definition: amprt.h:2489
const unsigned int * _Get_view_offset() const
Definition: amprt.h:1606
_Event _Get_access_async(const _View_key _Key, accelerator_view _Av, _Access_mode _Mode, _Buffer_ptr &_Buf_ptr)
Definition: amprt.h:3389
unsigned int _Rank
Definition: amprt.h:2977
unsigned int _M_groupExtentY
Definition: amprt.h:3547
accelerator_view _M_accelerator_view
Definition: amprt.h:3645
Definition: amprt.h:295
bool _M_RW_aliasing
Definition: amprt.h:3530
unsigned int _Get_view_format() const
Definition: amprt.h:2235
bool _Subsumes(const _View_shape *_Other) const
Definition: amprt.h:1735
_AMPIMP accelerator()
Construct a accelerator representing the default accelerator
_AMPIMP void HELPERAPI __dpc_set_kernel_shader_info(_In_ _DPC_call_handle *_Handle, _Inout_ void *_ShaderBlobs)
Definition: amprt.h:91
size_t _M_row_pitch
Definition: amprt.h:2365
details::_Reference_counted_obj_ptr< details::_Texture > _Texture_ptr
Definition: amprt.h:309
_AMPIMP const wchar_t * _Get_description() const
unsigned int * _M_base_extent
Definition: amprt.h:1876
bool _Get_chunked_staging_texture(_In_ _Texture *_Tex, const size_t *_Copy_chunk_extent, _Inout_ size_t *_Remaining_copy_extent, _Out_ size_t *_Curr_copy_extent, _Out_ _Texture_ptr *_Staging_texture)
Definition: amprt.h:2443
std::wstring get_description() const
Returns the device description as a std::wstring
Definition: amprt.h:1140
_Ret_ _Accelerator_view_impl * _Get_accelerator_view_impl() const
Definition: amprt.h:2034
_AMPIMP _Ret_ IUnknown *__cdecl get_device(const accelerator_view &_Av)
Get the D3D device interface underlying a accelerator_view.
unsigned int _Get_address_mode() const
Definition: amprt.h:2402
_Buffer_descriptor & operator=(const _Buffer_descriptor &_Other) __GPU
Definition: amprt.h:490
std::vector< unsigned int > _Dst_copy_offset
Definition: amprt.h:2987
constexpr _Ty & get(array< _Ty, _Size > &_Arr) _NOEXCEPT
Definition: array:493
unsigned int _M_groupExtentZ
Definition: amprt.h:3548
static _AMPIMP const wchar_t direct3d_warp[]
String constant for direct3d WARP accelerator
Definition: amprt.h:1040
#define __GPU
Definition: amprt.h:45
_AMPIMP accelerator_view __cdecl create_accelerator_view(_In_ IUnknown *_D3D_device, queuing_mode _Qmode=queuing_mode_automatic)
Create a accelerator_view from a D3D device interface pointer.
future< result_of_t< decay_t< _Fty >decay_t< _ArgTypes >...)> > async(launch _Policy, _Fty &&_Fnarg, _ArgTypes &&..._Args)
Definition: future:1919
size_t _Get_row_pitch() const
Definition: amprt.h:2311
_Reference_counted_obj_ptr & operator=(const _Reference_counted_obj_ptr &_Other)
Definition: amprt.h:200
Definition: amprt.h:107
_Buffer_descriptor(_In_ void *_Data_ptr, _In_ _Ubiquitous_buffer *_Buffer_ptr, _Access_mode _Curr_cpu_access_mode, _Access_mode _Type_mode) __GPU
Definition: amprt.h:467
void * _M_data_ptr
Definition: amprt.h:2427
std::future_status wait_until(const std::chrono::time_point< _Clock, _Duration > &_Abs_time) const
Blocks until the associated asynchronous operation completes or until the current time exceeds _Abs_t...
Definition: amprt.h:1385
void _Copy_data_on_host_dst_iter(int _Rank, const _Value_type *_Src, _Output_iterator _Dst, size_t _Width, size_t _Height, size_t _Depth, size_t _Src_row_pitch_in_bytes, size_t _Src_depth_pitch_in_bytes, size_t _Dst_row_pitch, size_t _Dst_depth_pitch)
Definition: amprt.h:2553
_In_ int _Val
Definition: vcruntime_string.h:62
_Ret_ void * _Get_resource_ptr() const
Definition: amprt.h:3477
Class represents a virtual device abstraction on a C++ AMP data-parallel accelerator ...
Definition: amprt.h:1442
_AMPIMP bool _Is_finished()
Poll whether the _Event has completed or not and throws any exceptions that occur ...
RAII wrapper for a D3D access lock on an accelerator_view.
Definition: amprt.h:945
_AMPIMP std::pair< accelerator_view, accelerator_view > __cdecl _Get_src_dest_accelerator_view(_In_opt_ const _Buffer_descriptor *_SrcBuffDescPtr, _In_opt_ const _Buffer_descriptor *_DestBuffDescPtr)
_AMPIMP bool __cdecl d3d_access_try_lock(accelerator_view &_Av)
Attempt to acquire the D3D access lock on an accelerator_view without blocking.
STL namespace.
_Ret_ _Accelerator_view_impl * _Get_accelerator_view_impl_ptr(const accelerator_view &_Accl_view)
Definition: amprt.h:1556
std::vector< unsigned int > _Src_extents
Definition: amprt.h:2981
_Ubiquitous_buffer * _M_buffer_ptr
Definition: amprt.h:445
bool _M_owns_data
Definition: amprt.h:2137
details::_Reference_counted_obj_ptr< details::_View_shape > _View_shape_ptr
Definition: amprt.h:313
bool _Is_buffer_unaccessed(size_t const _Buffer_idx) const
Definition: amprt.h:3601
The Concurrency namespace provides classes and functions that provide access to the Concurrency Runti...
Definition: agents.h:43
Class represents a future corresponding to a C++ AMP asynchronous operation
Definition: amprt.h:1266
_Ret_ _Texture_descriptor * _Get_texture_desc() const
Definition: amprt.h:3465
Definition: amprt.h:297
std::wstring get_device_path() const
Returns the system-wide unique device instance path as a std::wstring
Definition: amprt.h:1124
bool _Is_valid_access_mode(_Access_mode _Mode)
Definition: amprt.h:417
void _Set_buffer_ptr(_In_opt_ _Ubiquitous_buffer *) __GPU_ONLY
Definition: amprt.h:525
size_t _Get_depth(unsigned int _Mip_offset=0) const
Definition: amprt.h:2220
std::unordered_set< _View_info * > _M_view_info_set
Definition: amprt.h:3371
_Texture_descriptor(_Texture_descriptor &&_Other) __CPU_ONLY
Definition: amprt.h:640
void _Set_texture_ptr(_In_opt_ _Texture *) __GPU_ONLY
Definition: amprt.h:691
_AMPIMP _Event __cdecl _Copy_async_impl(_In_ _Texture *_Src_tex, const size_t *_Src_offset, unsigned int _Src_mipmap_level, _Out_ _Texture *_Dst_tex, const size_t *_Dst_offset, unsigned int _Dst_mipmap_level, const size_t *_Copy_extent, const size_t *_Preferred_copy_chunk_extent=NULL)
_AMPIMP bool operator==(const _Event &_Other) const
Return true if the other _Event is same as this _Event; false otherwise
accelerator(const std::wstring &_Device_path)
Construct a accelerator representing the accelerator with the specified device instance path ...
Definition: amprt.h:1056
Definition: agents.h:75
Tag type to indicate the D3D access lock should be adopted rather than acquired.
Definition: amprt.h:940
const bool * _Get_projection_info() const
Definition: amprt.h:1615
void _Set_depth_pitch(size_t _Val)
Definition: amprt.h:2326
unsigned int _M_is_debug_data
Definition: amprt.h:3506
_AMPIMP scoped_d3d_access_lock(accelerator_view &_Av)
Acquire a D3D access lock on the given accelerator_view. The lock is released when this object goes o...
_Texture_descriptor(_In_ _Texture *_Texture_ptr) __CPU_ONLY
Definition: amprt.h:594
details::_Reference_counted_obj_ptr< details::_Accelerator_impl > _Accelerator_impl_ptr
Definition: amprt.h:307
_AMPIMP bool get_supports_limited_double_precision() const
Returns a boolean value indicating whether the accelerator has limited double precision support (excl...
Definition: amprt.h:2152
_AMPIMP bool _Is_empty() const
Tells if this is an empty event
Definition: chrono:290
_Ret_ T * operator->() const
Definition: amprt.h:233
bool _M_has_data_source
Definition: amprt.h:3353
Definition: amprt.h:294
unsigned int _Get_num_channels() const
Definition: amprt.h:2240
bool _Owns_data() const
Definition: amprt.h:2044
_AMPIMP bool __cdecl _Set_default_accelerator(_Accelerator_impl_ptr _Accl)
_AMPIMP _Ret_ _DPC_call_handle *HELPERAPI __dpc_create_call_handle(_In_ _Host_Scheduling_info *_Sch_info)
access_type _Get_cpu_access_type(_Access_mode _Cpu_access_mode)
Definition: amprt.h:1945
bool operator==(const _Sampler_descriptor &_Other) const __GPU
Definition: amprt.h:775
_AMPIMP bool __cdecl _Is_D3D_accelerator_view(const accelerator_view &_Av)
_AMPIMP bool get_supports_cpu_shared_memory() const
Returns a boolean value indicating whether the accelerator supports memory accessible both by the acc...
_AMPIMP void _Get()
Wait until the _Event completes and throw any exceptions that occur.
details::_Reference_counted_obj_ptr< details::_Accelerator_view_impl > _Accelerator_view_impl_ptr
Definition: amprt.h:306
unsigned int _M_rank
Definition: amprt.h:2355
_AMPIMP _Ret_ _Amp_runtime_trace *__cdecl _Get_amp_trace()
BOOL _Is_sampler() const
Definition: amprt.h:3454
bool _Is_adopted() const
Definition: amprt.h:2081
enum _Short_vector_base_type_id _Texture_base_type_id
Definition: amprt.h:302
basic_ostream< _Elem, _Traits > &__CLRCALL_OR_CDECL flush(basic_ostream< _Elem, _Traits > &_Ostr)
Definition: ostream:1009
The task_completion_event class allows you to delay the execution of a task until a condition is sati...
Definition: ppltasks.h:2625
_AMPIMP bool get_has_display() const
Returns a boolean value indicating whether the accelerator is attached to a display ...
_Sampler_descriptor(const _Sampler_descriptor &_Other) __GPU
Definition: amprt.h:751
size_t _Get_reference_count()
Definition: amprt.h:156
void _Set_buffer_ptr(_In_opt_ _Ubiquitous_buffer *_Buffer_ptr) __CPU_ONLY
Definition: amprt.h:508
Definition: amprt.h:93
size_t _M_num_resources
Definition: amprt.h:3522
_AMPIMP accelerator & operator=(const accelerator &_Other)
Assignment operator
bool _Compare_base_shape(const _View_shape *_Other) const
Definition: amprt.h:1834
bool _Is_host_accessible(_Access_mode _Requested_access_mode) const
Definition: amprt.h:2066
unsigned int _Get_most_detailed_mipmap_level() const __GPU
Definition: amprt.h:659
#define UINT_MAX
Definition: limits.h:36
A non-reentrant mutex which is explicitly aware of the Concurrency Runtime.
Definition: concrt.h:3488
unsigned int _M_groupCountZ
Definition: amprt.h:3543
const unsigned int * _Get_view_extent() const
Definition: amprt.h:1610
_T _Greatest_common_divisor(_T _M, _T _N)
Definition: amprt.h:2658
bool _Is_buffer_aliased(_In_ void *const _Buffer_ptr) const
Definition: amprt.h:3596
_AMPIMP bool get_is_emulated() const
Returns a boolean value indicating whether the accelerator is emulated. This is true, for example, with the direct3d reference and WARP accelerators.
_Accelerator_view_impl_ptr _M_access_on_accelerator_view
Definition: amprt.h:2130
static _AMPIMP const wchar_t cpu_accelerator[]
String constant for cpu accelerator
Definition: amprt.h:1035
Definition: amprt.h:293
_Trace * _M_trace_ptr
Definition: amprt.h:3902
typedef PVOID(NTAPI *ResolveDelayLoadedAPIProc)(_In_ PVOID ParentModuleBase
_Iter_diff_t< _InIt > distance(_InIt _First, _InIt _Last)
Definition: xutility:1124
_Event _Copy_impl_iter(_InputInterator _SrcIter, _View_shape_ptr _Src_shape, _OutputIterator _DstIter, _View_shape_ptr _Dst_shape)
Definition: amprt.h:3109
_AMPIMP _Event _Add_event(_Event _Ev)
Creates an event which is an ordered collection of this and _Ev
unsigned int _M_bits_per_channel
Definition: amprt.h:2361
void then(const _Functor &_Func) const
Chains a callback Functor to the completion_future to be executed when the associated asynchronous op...
Definition: amprt.h:1408
unsigned int _M_filter_mode
Definition: amprt.h:2429
_Ret_ T * _Get_ptr() const
Definition: amprt.h:248
_AMPIMP ~scoped_d3d_access_lock()
Destructor for scoped_d3d_access_lock: unlock the accelerator_view.
unsigned int _M_mip_levels
Definition: amprt.h:2363
int i[4]
Definition: dvec.h:68
_Buffer_descriptor() __GPU
Definition: amprt.h:461
void _Set_row_pitch(size_t _Val)
Definition: amprt.h:2316
details::_Reference_counted_obj_ptr< details::_Ubiquitous_buffer > _Ubiquitous_buffer_ptr
Definition: amprt.h:311
static std::vector< accelerator > get_all()
Returns the vector of accelerator objects representing all available accelerators ...
Definition: amprt.h:1082
bool _Contains(const unsigned int *_Element_index) const
Definition: amprt.h:1845
_Texture_descriptor(const _Texture_descriptor &_Other) __GPU
Definition: amprt.h:610
completion_future(completion_future &&_Other)
Move constructor
Definition: amprt.h:1290
_AMPIMP _Ret_ _Accelerator_impl_ptr *__cdecl _Get_devices()
_Access_mode _M_curr_cpu_access_mode
Definition: amprt.h:450
unsigned int _Get_view_mipmap_levels() const __GPU
Definition: amprt.h:664
#define _In_
Definition: sal.h:305
_AMPIMP void __cdecl amp_uninitialize()
Uninitializes the C++ AMP runtime. It is legal to call this function multiple times during an applica...
volatile ULONG _M_counter
Definition: amprt.h:3899
#define _Inout_opt_
Definition: sal.h:376
size_t _Get_num_elems() const
Definition: amprt.h:2029
std::unique_ptr< std::unordered_set< _View_key > > _M_view_keys
Definition: amprt.h:2147
bool _M_is_temp
Definition: amprt.h:2141
_Accelerator_view_impl_ptr _M_accelerator_view
Definition: amprt.h:2129
#define _In_opt_
Definition: sal.h:306
_Event_impl_ptr _M_ptr_event_impl
Definition: amprt.h:407
_Sampler_descriptor(_Sampler_descriptor &&_Other) __CPU_ONLY
Definition: amprt.h:770
#define FAILED(hr)
Definition: comutil.h:71
void _Set_sampler_ptr(_In_opt_ _Sampler *) __GPU_ONLY
Definition: amprt.h:802
_Ret_ void * _Get_data_ptr() const
Definition: amprt.h:2386
bool _Overlaps(const _View_shape *_Other) const
Definition: amprt.h:1704
std::vector< unsigned int > _Src_copy_offset
Definition: amprt.h:2982
unsigned int _Get_view_size() const
Definition: amprt.h:1636
unsigned int _Dst_linear_offset
Definition: amprt.h:2985
Definition: amprt.h:1960
struct Concurrency::details::_Sampler_descriptor _Sampler_descriptor
_AMPIMP _View_shape_ptr _Get_view_shape(_In_ _View_key _Key)
_Reference_counted_obj_ptr & operator=(_Reference_counted_obj_ptr &&_Other)
Definition: amprt.h:216
_Ret_ _Accelerator_view_impl * _Get_access_on_accelerator_view_impl() const
Definition: amprt.h:2039
#define __CPU_ONLY
Definition: amprt.h:47
IUnknown * _M_data_ptr
Definition: amprt.h:558
BOOL _Is_buffer() const
Definition: amprt.h:3444
_Accelerator_view_impl_ptr _M_impl
Definition: amprt.h:1550
const float * _Get_border_color() const
Definition: amprt.h:2407
static _AMPIMP const wchar_t default_accelerator[]
String constant for default accelerator
Definition: amprt.h:1030
bool _M_is_adopted
Definition: amprt.h:2428
bool _M_is_staging
Definition: amprt.h:2138
_Ret_ _Buffer_descriptor * _Get_buffer_desc() const
Definition: amprt.h:3459
size_t _M_height
Definition: amprt.h:2357
_Array_copy_desc(const unsigned int _Rank, const unsigned int _Src_linear_offset, const unsigned int *_Src_extents, const unsigned int *_Src_copy_offset, const unsigned int _Dst_linear_offset, const unsigned int *_Dst_extents, const unsigned int *_Dst_copy_offset, const unsigned int *_Copy_extents)
Definition: amprt.h:2952
~_Reference_counted_obj_ptr()
Definition: amprt.h:192
Exception thrown due to a C++ AMP runtime_exception. This is the base type for all C++ AMP exception ...
Definition: amprt_exceptions.h:29
std::unordered_map< _View_key, _View_info * > _M_view_map
Definition: amprt.h:3362
bool _Is_buffer() const
Definition: amprt.h:2087
_Buffer_descriptor * _View_key
Definition: amprt.h:410
void wait() const
Blocks until the associated asynchronous operation completes
Definition: amprt.h:1355
_AMPIMP size_t get_dedicated_memory() const
Get the dedicated memory for this accelerator in KB
struct Concurrency::details::_Buffer_descriptor _Buffer_descriptor
_AMPIMP accelerator_view get_default_view() const
Return the default accelerator view associated with this accelerator
_Ret_ void * _Get_host_ptr() const
Definition: amprt.h:2019
_AMPIMP accelerator __cdecl _Select_default_accelerator()
unsigned int _Get_linear_offset() const
Definition: amprt.h:1596
_AMPIMP void __cdecl d3d_access_lock(accelerator_view &_Av)
Acquire a lock on an accelerator_view for the purpose of safely performing D3D operations on resource...
unsigned int _M_compute_rank
Definition: amprt.h:3537
The Parallel Patterns Library (PPL) task class. A task object represents work that can be executed as...
Definition: ppltasks.h:3898
Definition: amprt.h:3425
void swap(array< _Ty, _Size > &_Left, array< _Ty, _Size > &_Right) _NOEXCEPT_OP(_NOEXCEPT_OP(_Left.swap(_Right)))
Definition: array:433
std::unordered_set< accelerator_view, _Accelerator_view_hasher > _Accelerator_view_unordered_set
Definition: amprt.h:1578
#define HELPERAPI
Definition: amprt.h:3416
bool _Are_mipmap_levels_overlapping(const _Texture_descriptor *_Other) const __CPU_ONLY
Definition: amprt.h:699
size_t _Get_master_buffer_num_elems() const
Definition: amprt.h:3241
_AMPIMP bool set_default_cpu_access_type(access_type _Default_cpu_access_type)
Set the default cpu access_type for arrays created on this accelerator or for implicit memory allocat...
std::map< _Accelerator_view_impl_ptr, _Buffer_ptr > _M_buffer_map
Definition: amprt.h:3358
_AMPIMP void _Get_CPU_access(_Access_mode _Requested_mode) const
_Access_mode _M_current_host_access_mode
Definition: amprt.h:2134
_Accelerator_impl_ptr _M_impl
Definition: amprt.h:1260
details::_Reference_counted_obj_ptr< details::_Event_impl > _Event_impl_ptr
Definition: amprt.h:312
unsigned int _Get_data_length(unsigned int _Most_detailed_mipmap_level, unsigned int _View_mipmap_levels, const size_t *_Extents=nullptr) const
Definition: amprt.h:2256
size_t _M_actual_arg_num
Definition: amprt.h:3442
_Ret_ _Ubiquitous_buffer * _Get_buffer(const _Array_type &_Array) __CPU_ONLY
Definition: xxamp.h:1069
size_t _M_width
Definition: amprt.h:2356
unsigned int _M_groupCountX
Definition: amprt.h:3541
_Reference_counted_obj_ptr(_Reference_counted_obj_ptr &&_Other)
Definition: amprt.h:185
_Sampler_descriptor() __GPU
Definition: amprt.h:733
_Ret_ _Sampler * _Get_sampler_ptr() const __CPU_ONLY
Definition: amprt.h:780
_Reference_counter()
Definition: amprt.h:126
unsigned int _Get_mipmap_levels(const _Texture *_Tex)
Definition: amprt.h:3909
_Sampler_descriptor(_In_ _Sampler *_Sampler_ptr) __GPU
Definition: amprt.h:738
#define false
Definition: stdbool.h:16
_AMPIMP ULONG _Start_async_op_wait_event(ULONG _Async_op_id)
_Ret_ _View_key _Get_view_key()
Definition: amprt.h:537
_Access_mode _M_type_access_mode
Definition: amprt.h:455
void _Set_data_ptr(_In_ IUnknown *_Data_ptr)
Definition: amprt.h:2123
_AMPIMP access_type get_default_cpu_access_type() const
Get the default cpu access_type for buffers created on this accelerator
unsigned int _M_rank
Definition: amprt.h:1874
_AMPIMP void __cdecl d3d_access_unlock(accelerator_view &_Av)
Release the D3D access lock on the given accelerator_view. If the calling thread does not hold the lo...
bool _Is_valid() const
Definition: amprt.h:1810
unsigned int _M_groupExtentX
Definition: amprt.h:3546
void * _M_data_ptr
Definition: amprt.h:720
Concurrency::critical_section _M_critical_section
Definition: amprt.h:2148
friend _Event _Get_access_async(const _View_key _Key, accelerator_view _Av, _Access_mode _Mode, _Buffer_ptr &_Buf_ptr)
Definition: amprt.h:3389
_AMPIMP void _Init(const wchar_t *_Path)
unsigned int _Get_rank() const
Definition: amprt.h:2225
size_t _Get_depth_pitch() const
Definition: amprt.h:2321
~_Texture_descriptor() __GPU
Definition: amprt.h:604
completion_future & operator=(completion_future &&_Other)
Move assignment operator
Definition: amprt.h:1319
unsigned int * _M_view_offset
Definition: amprt.h:1877
~_Buffer_descriptor() __GPU
Definition: amprt.h:476
_Device_resource_info * _M_device_resource_info
Definition: amprt.h:3521
_AMPIMP size_t __cdecl _Get_preferred_copy_chunk_size(size_t _Total_copy_size_in_bytes)
void * _M_data_ptr
Definition: amprt.h:438
Definition: amprt.h:94
bool _Is_projection() const
Definition: amprt.h:1620
_Ret_ _View_shape * _Get_buffer_view_shape(const _Buffer_descriptor &_Descriptor)
Definition: amprt.h:3395
#define _AMPIMP
Definition: amprt_exceptions.h:20
unsigned int _Get_linear_offset(const unsigned int *_Element_index) const
Definition: amprt.h:1859
std::vector< std::tuple< PVOID, LPCWSTR, LPCWSTR > > _M_accelerator_configs
Definition: amprt.h:3896
_AMPIMP void HELPERAPI __dpc_dispatch_kernel(_In_ _DPC_call_handle *_Handle)
_Access_mode
Definition: amprt.h:88
_AMPIMP _Event & operator=(const _Event &_Other)
Assignment operator
static bool set_default(const std::wstring &_Path)
Sets the default accelerator to be used for any operation that implicitly uses the default accelerato...
Definition: amprt.h:1105
basic_string< wchar_t, char_traits< wchar_t >, allocator< wchar_t > > wstring
Definition: xstring:2636
bool operator==(const _Texture_descriptor &_Other) const __GPU
Definition: amprt.h:645
_AMPIMP void HELPERAPI __dpc_set_const_buffer_info(_In_ _DPC_call_handle *_Handle, _In_ _Device_const_buffer_info *_DeviceConstBufferInfo, size_t _NumConstBuffers)
bool _Is_adopted() const
Definition: amprt.h:2391
std::vector< unsigned int > _Dst_extents
Definition: amprt.h:2986
size_t _Get_height(unsigned int _Mip_offset=0) const
Definition: amprt.h:2215
void _Set_view_mipmap_levels(unsigned int _View_mipmap_levels) __CPU_ONLY
Definition: amprt.h:669
unsigned int _M_texture_format
Definition: amprt.h:2359
_AMPIMP void HELPERAPI __dpc_set_device_resource_info(_In_ _DPC_call_handle *_Handle, _In_ _Device_resource_info *_DeviceResourceInfo, size_t _NumResources)
_Array_copy_desc()
Definition: amprt.h:2975
_Device_const_buffer_info * _M_const_buffer_info
Definition: amprt.h:3527
_DPC_call_handle(const accelerator_view &_Accelerator_view)
Definition: amprt.h:3550
void _Set_texture_ptr(_In_opt_ _Texture *_Texture_ptr) __CPU_ONLY
Definition: amprt.h:674
Definition: amprt.h:3427
Definition: amprt.h:109
__declspec(property(get=get_device_path)) std _AMPIMP unsigned int get_version() const
Get the version for this accelerator
concurrency::task< void > to_task() const
Returns a concurrency::task object corresponding to the associated asynchronous operation ...
Definition: amprt.h:1421
void advance(_InIt &_Where, _Diff _Off)
Definition: xutility:1089
future_status
Definition: future:97
static unsigned int _Get_extent_size(unsigned int _Rank, const unsigned int *_Extent)
Definition: amprt.h:1800
T & operator*() const
Definition: amprt.h:238
#define _Out_writes_(size)
Definition: sal.h:345
static bool _Intervals_overlap(size_t _First_start, size_t _First_end, size_t _Second_start, size_t _Second_end)
Definition: amprt.h:1782
unsigned int _Get_view_linear_offset() const
Definition: amprt.h:1641
access_type _Get_allowed_host_access_type() const
Definition: amprt.h:2061
_AMPIMP _Event()
Constructor of the _Event.
Definition: amprt.h:105
Definition: amprt.h:90
int _M_is_flat_model
Definition: amprt.h:3536
unsigned int _M_groupCountY
Definition: amprt.h:3542
_AMPIMP void HELPERAPI __dpc_release_call_handle(_In_ _DPC_call_handle *_Handle)
unsigned int _M_num_channels
Definition: amprt.h:2362
queuing_mode
Queuing modes supported for accelerator views
Definition: amprt.h:830
_Ret_ _Sampler_descriptor * _Get_sampler_desc() const
Definition: amprt.h:3471
unsigned int _Src_linear_offset
Definition: amprt.h:2980
Definition: type_traits:92
_Resource_kind
Definition: amprt.h:3423
_Buffer_descriptor(const _Buffer_descriptor &_Other) __GPU
Definition: amprt.h:482
unsigned int _M_most_detailed_mipmap_level
Definition: amprt.h:569
bool _Is_array() const
Definition: amprt.h:532
details::_Reference_counted_obj_ptr< details::_Sampler > _Sampler_ptr
Definition: amprt.h:310
size_t _M_const_buf_size
Definition: amprt.h:3504
_Ret_ _View_shape * _Create_reinterpreted_shape(const _View_shape *_Source_shape, size_t _Curr_elem_size, size_t _New_elem_size)
Definition: amprt.h:1885
size_t _M_num_samplers
Definition: amprt.h:3524
_Accelerator_view_impl * _M_rv
Definition: amprt.h:3517
bool valid() const
Returns true if the object is associated with an asynchronous operation
Definition: amprt.h:1347
~_Sampler_descriptor() __GPU
Definition: amprt.h:745
_Accelerator_view_impl_ptr _M_impl
Definition: amprt.h:1006
_Ret_ _Accelerator_impl * _Get_accelerator_impl_ptr(const accelerator &_Accl)
Definition: amprt.h:1561
constexpr remove_reference< _Ty >::type && move(_Ty &&_Arg) _NOEXCEPT
Definition: type_traits:1290
Definition: amprt.h:3378
friend class accelerator_view
Definition: amprt.h:1015
_AMPIMP const wchar_t * _Get_device_path() const
_AMPIMP void HELPERAPI __dpc_set_kernel_dispatch_info(_In_ _DPC_call_handle *_Handle, unsigned int _ComputeRank, _In_ int *_Extents, unsigned int _GroupRank, const unsigned int *_GroupExtents, unsigned int &_GroupCountX, unsigned int &_GroupCountY, unsigned int &_GroupCountZ)
#define _Inout_
Definition: sal.h:375
Concurrency::critical_section _M_critical_section
Definition: amprt.h:3374
virtual ~_Reference_counter() noexcept(false)
Definition: amprt.h:129
Definition: ratio:92
exception_ptr current_exception() _NOEXCEPT
Definition: exception:359
_AMPIMP bool get_supports_double_precision() const
Returns a boolean value indicating whether the accelerator supports full double precision (including ...
static _AMPIMP accelerator_view __cdecl get_auto_selection_view()
Returns the auto selection accelerator_view which when specified as the parallel_for_each target resu...
size_t _M_depth_pitch
Definition: amprt.h:2366
_Ret_ _Texture * _Get_texture_ptr() const __CPU_ONLY
Definition: amprt.h:653
_Ret_ _Ubiquitous_buffer * _Get_buffer_ptr() const __CPU_ONLY
Definition: amprt.h:503
std::enable_if<!std::is_base_of< std::input_iterator_tag, typename std::iterator_traits< _OutputIterator >::iterator_category >::value >::type _Advance_output_iterator(_OutputIterator &_Iter, size_t _N)
Definition: amprt.h:2817
static void _UnInitialize(_In_ T *_Obj_ptr)
Definition: amprt.h:264
Definition: amprt.h:318
bool _M_is_explicit_target_acclview
Definition: amprt.h:3518
_Access_mode _Get_synchronize_access_mode(access_type cpu_access_type)
Definition: amprt.h:1927
const _View_key _Get_view_key() const
Definition: amprt.h:542
_Resource_kind _M_resource_kind
Definition: amprt.h:3432
_Texture_descriptor(unsigned int _Most_detailed_mipmap_level, unsigned int _View_mipmap_levels) __GPU
Definition: amprt.h:588
Definition: type_traits:950
completion_future(const completion_future &_Other)
Copy constructor
Definition: amprt.h:1281
Definition: amprt.h:298
bool _Is_temp() const
Definition: amprt.h:2076
size_t _M_num_const_buffers
Definition: amprt.h:3528
size_t _M_num_elems
Definition: amprt.h:2136
bool _M_is_adopted
Definition: amprt.h:2143
~completion_future()
Destructor
Definition: amprt.h:1299
Class represents a accelerator abstraction for C++ AMP data-parallel devices
Definition: amprt.h:1013
_Access_mode _Get_current_host_access_mode() const
Definition: amprt.h:2071
Definition: amprt.h:106
Definition: xstring:21
_Texture_descriptor() __GPU
Definition: amprt.h:581
BOOL _Is_texture() const
Definition: amprt.h:3449
_Buffer * _M_master_buffer
Definition: amprt.h:3344
size_t _Remove_reference()
Definition: amprt.h:140
_Texture * _M_texture_ptr
Definition: amprt.h:565
void _Set_host_ptr(_In_ void *_Host_ptr, _Access_mode _Host_access_mode=_No_access)
Definition: amprt.h:2110
Definition: amprt.h:92
std::unique_ptr< std::vector< int > > _M_Redirect_indices
Definition: amprt.h:3636
_AMPIMP size_t __cdecl _Get_num_devices()
size_t _Add_reference()
Definition: amprt.h:133
std::unique_ptr< std::unordered_set< void * > > _M_aliased_buffer_set
Definition: amprt.h:3633
_Texture_descriptor & operator=(const _Texture_descriptor &_Other) __GPU
Definition: amprt.h:626
Concurrency::critical_section _M_critical_section
Definition: amprt.h:3893
size_t _M_elem_size
Definition: amprt.h:2135
bool _Has_data_source() const
Definition: amprt.h:3246
void _Set_sampler_ptr(_In_opt_ _Sampler *_Sampler_ptr) __CPU_ONLY
Definition: amprt.h:785
_AMPIMP accelerator_view create_view(queuing_mode qmode=queuing_mode_automatic)
Create and return a new accelerator view on this accelerator with the specified queuing mode...
#define __GPU_ONLY
Definition: amprt.h:46
bool _M_is_buffer
Definition: amprt.h:2144
_AMPIMP HRESULT __cdecl _Recursive_array_copy(const _Array_copy_desc &_Desc, unsigned int _Native_copy_rank, std::function< HRESULT(const _Array_copy_desc &_Reduced)> _Native_copy_func)
size_t _Get_width(unsigned int _Mip_offset=0) const
Definition: amprt.h:2210
bool _Is_staging() const
Definition: amprt.h:2051
bool set() const
Sets the task completion event.
Definition: ppltasks.h:2641
bool _Is_view_linear(unsigned int &_Linear_offset, unsigned int &_Linear_size) const
Definition: amprt.h:1679
_Sampler * _M_sampler_ptr
Definition: amprt.h:727
concurrency::task< void > _M_task
Definition: amprt.h:1436
bool _Is_cpu_accelerator(const accelerator &_Accl)
Definition: amprt.h:3401
bool _Is_valid(size_t _Buffer_size) const
Definition: amprt.h:1625
struct Concurrency::details::_Texture_descriptor _Texture_descriptor
Definition: amprt.h:1581
void * _M_host_ptr
Definition: amprt.h:2132
unsigned int _Get_filter_mode() const
Definition: amprt.h:2397
unsigned int _M_address_mode
Definition: amprt.h:2430
_Access_mode _M_formal_access_mode
Definition: amprt.h:3437
_Reference_counted_obj_ptr(T *_Ptr=NULL)
Definition: amprt.h:173
unsigned int _Get_bits_per_channel() const
Definition: amprt.h:2245
static bool _Compare_extent_with_elem_size(unsigned int _Rank, const unsigned int *_Extent1, size_t _Elem_size1, const unsigned int *_Extent2, size_t _Elem_size2)
Definition: amprt.h:1647
void * _M_desc
Definition: amprt.h:3434
unsigned int _M_view_format
Definition: amprt.h:2360
const int * _Get_redirect_indices() const
Definition: amprt.h:3615
_Access_mode _M_allowed_host_access_mode
Definition: amprt.h:2133
_AMPIMP ~accelerator()
Destructor
__declspec(property(get=get_description)) std _AMPIMP bool get_is_debug() const
Returns a boolean value indicating whether the accelerator was created with DEBUG layer enabled for e...
_AMPIMP bool operator!=(const _Event &_Other) const
Return false if the other _Event is same as this _Event; true otherwise
Definition: amprt.h:296
unsigned int * _M_grid_extents
Definition: amprt.h:3538
_T _Least_common_multiple(_T _M, _T _N)
Definition: amprt.h:2681
friend _Event _Get_access_async(const _View_key _Key, accelerator_view _Av, _Access_mode _Mode, _Buffer_ptr &_Buf_ptr)
Definition: amprt.h:3389
void _Set_buffer_unaccessed(size_t _Buffer_idx)
Definition: amprt.h:3606
size_t _Get_preferred_copy_chunk_num_elems(size_t _Total_num_elems, size_t _Elem_size)
Definition: amprt.h:2612
_AMPIMP void _Write_end_event(ULONG _Span_id)
unsigned int _Get_rank() const
Definition: amprt.h:1591
#define _Ret_
Definition: sal.h:996
_Texture_descriptor(const _Texture_descriptor &_Other, unsigned int _Most_detailed_mipmap_level, unsigned int _View_mipmap_levels) __GPU
Definition: amprt.h:618
_Access_mode _Get_allowed_host_access_mode() const
Definition: amprt.h:2056
concurrency::completion_future _Start_async_op_wait_event_helper(ULONG _Async_op_id, _Event _Ev)
Definition: amprt.h:3753
bool * _M_projection_info
Definition: amprt.h:1879
access_type
Enumeration type used to denote the various types of access to data.
Definition: amprt.h:103
void * _M_data_ptr
Definition: amprt.h:2131
size_t _M_master_buffer_elem_size
Definition: amprt.h:3347
_AMPIMP void __cdecl _Register_async_event(const _Event &_Ev, const std::shared_future< void > &_Shared_future)
_AMPIMP bool _Is_finished_nothrow()
Poll whether the _Event has completed or not. Swallows any exceptions
_Ret_ _Accelerator_view_impl * _Get_master_accelerator_view_impl() const
Definition: amprt.h:3231
completion_future(const std::shared_future< void > &_Shared_future, const concurrency::task< void > &_Task)
Definition: amprt.h:1429