STLdoc
STLdocumentation
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
amprt.h
Go to the documentation of this file.
1 /***
2 * ==++==
3 *
4 * Copyright (c) Microsoft Corporation. All rights reserved.
5 *
6 * ==--==
7 * =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
8 *
9 * amprt.h
10 *
11 * Define the C++ interfaces exported by the C++ AMP runtime
12 *
13 * =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
14 ****/
15 #pragma once
16 
17 #if !(defined (_M_X64) || defined (_M_IX86) || defined (_M_ARM) || defined (_M_ARM64) )
18  #error ERROR: C++ AMP runtime is supported only on X64, X86, ARM, and ARM64 architectures.
19 #endif
20 
21 #if defined (_M_CEE)
22  #error ERROR: C++ AMP runtime is not supported when compiling /clr.
23 #endif
24 
25 #ifndef __cplusplus
26  #error ERROR: C++ AMP runtime is supported only for C++.
27 #endif
28 
29 #if !defined(_CXXAMP)
30 
31 #if defined(_DEBUG)
32  #pragma comment(lib, "vcampd")
33 #else // _DEBUG
34  #pragma comment(lib, "vcamp")
35 #endif // _DEBUG
36 
37 #endif // _CXXAMP
38 
39 #if !defined(_CXXAMP)
40 
41 #define __GPU restrict(amp,cpu)
42 #define __GPU_ONLY restrict(amp)
43 #define __CPU_ONLY
44 
45 #else
46 
47 #define __GPU
48 #define __GPU_ONLY
49 #define __CPU_ONLY
50 
51 #endif // _CXXAMP
52 
53 #include <exception>
54 #include <unknwn.h>
55 #include <crtdbg.h>
56 #include <string>
57 #include <vector>
58 
59 #if defined(_CXXAMP)
60 #include <strsafe.h>
61 #endif // _CXXAMP
62 
63 #include <future>
64 #include <functional>
65 #include <map>
66 #include <unordered_map>
67 #include <set>
68 #include <unordered_set>
69 #include <concrt.h>
70 #include <type_traits>
71 
72 #if !defined(_AMPIMP)
73 #define _AMPIMP __declspec(dllimport)
74 #endif
75 
76 #pragma pack(push,8)
77 
78 // Part of runtime-compiler interface
79 extern "C"
80 {
81  // Access mode of fields
83  {
85  _Read_access = (1 << 0),
86  _Write_access = (1 << 1),
87  _Is_array_mode = (1 << 30),
89  };
90 }
91 
92 namespace Concurrency
93 {
98  {
100  access_type_read = (1 << 0),
101  access_type_write = (1 << 1),
103  access_type_auto = (1 << 31),
104  };
105 
106 // Forward declarations
107 class accelerator_view;
108 class accelerator;
109 
110 namespace details
111 {
112  const size_t ERROR_MSG_BUFFER_SIZE = 1024;
113 
114  // A reference counter to be used as the base class for all reference counted types.
116  {
117  public:
118 
119  // Constructor.
121 
122  // Destructor.
123  virtual ~_Reference_counter() {}
124 
125  // Add a reference.
126  // Thread-safe.
127  size_t _Add_reference()
128  {
129  return InterlockedIncrement(reinterpret_cast<LONG volatile*>(&_M_rc));
130  }
131 
132  // Remove a reference.
133  // Thread-safe.
135  {
136  _ASSERTE(_M_rc > 0);
137 
138  size_t refCount = InterlockedDecrement(reinterpret_cast<LONG volatile*>(&_M_rc));
139 
140  if (refCount == 0)
141  this->_Release();
142 
143  return refCount;
144  }
145 
146  // Release the counter
147  _AMPIMP void _Release();
148 
149  // Return the reference count value
151  {
152  return _M_rc;
153  }
154 
155  private:
156  size_t _M_rc;
157  };
158 
159  // A smart pointer to a reference counted object
160  // T must be a type derived from _Reference_counter
161  template <class T>
163  {
164  public:
165 
166  // Constructor
168  {
169  _Init();
170  }
171 
172  // Copy constructor
174  {
175  _Init();
176  }
177 
178  // Move constructor
180  {
181  _Other._M_obj_ptr = nullptr;
182  // No change to ref-count
183  }
184 
185  // Destructor
187  {
188  if (_M_obj_ptr != NULL) {
190  }
191  }
192 
193  // Assignment operator
195  {
196  if (_M_obj_ptr != _Other._M_obj_ptr)
197  {
198  T *oldPtr = _M_obj_ptr;
199  _M_obj_ptr = _Other._M_obj_ptr;
200  _Init();
201 
202  if (oldPtr != NULL) {
203  _UnInitialize(oldPtr);
204  }
205  }
206  return *this;
207  }
208 
209  // Move-assignment operator
211  {
212  if (_M_obj_ptr != _Other._M_obj_ptr)
213  {
214  T *oldPtr = _M_obj_ptr;
215  _M_obj_ptr = _Other._M_obj_ptr;
216  _Other._M_obj_ptr = nullptr;
217  // No change to ref-count of the adopted pointer.
218 
219  if (oldPtr != nullptr)
220  {
221  _UnInitialize(oldPtr);
222  }
223  }
224  return *this;
225  }
226 
227  _Ret_ T* operator->() const
228  {
229  return _M_obj_ptr;
230  }
231 
232  T& operator*() const
233  {
234  return *_M_obj_ptr;
235  }
236 
237  operator T*() const
238  {
239  return _M_obj_ptr;
240  }
241 
242  _Ret_ T* _Get_ptr() const
243  {
244  return _M_obj_ptr;
245  }
246 
247  private:
249 
250  void _Init()
251  {
252  if (_M_obj_ptr == NULL)
253  return;
254 
255  reinterpret_cast<_Reference_counter*>(_M_obj_ptr)->_Add_reference();
256  }
257 
258  static void _UnInitialize(_In_ T *_Obj_ptr)
259  {
260  reinterpret_cast<_Reference_counter*>(_Obj_ptr)->_Remove_reference();
261  }
262  };
263 
264  // Forward declarations
265  class _Trace;
266  class _Amp_runtime_trace;
267  class _Buffer;
268  class _Texture;
269  class _Sampler;
270  class _Ubiquitous_buffer;
271  class _D3D_interop;
272  class _Accelerator_view_impl;
273  class _CPU_accelerator_view_impl;
274  class _D3D_accelerator_view_impl;
275  class _Accelerator_impl;
276  class _Event_impl;
277  class _DPC_runtime_factory;
278  class _View_shape;
279  struct _Buffer_descriptor;
281  struct _DPC_shader_blob;
282  struct _View_info;
283 
284  // The enum specifies the base type for short vector type.
285  enum _Short_vector_base_type_id : unsigned int
286  {
293  _Invalid_type = 0xFFFFFFFF
294  };
295 
297 
298 } // namespace Concurrency::details
299 
308 
309 namespace details
310 {
311  // The _Event class.
312  class _Event
313  {
314  friend class _Buffer;
315  friend class _Texture;
316  friend class accelerator_view;
318 
319  public:
323  _AMPIMP _Event();
324 
328  _AMPIMP ~_Event();
329 
333  _AMPIMP _Event(const _Event & _Other);
334 
338  _AMPIMP _Event & operator=(const _Event & _Other);
339 
347 
354  _AMPIMP bool _Is_finished();
355 
359  _AMPIMP void _Get();
360 
368  _AMPIMP bool _Is_empty() const;
369 
377 
384  _AMPIMP _Event _Add_continuation(const std::function<_Event __cdecl ()> &_Continuation_task);
385 
389  _AMPIMP bool operator==(const _Event &_Other) const;
390 
394  _AMPIMP bool operator!=(const _Event &_Other) const;
395 
396  private:
397 
398  // Private constructor
399  _Event(_In_ _Event_impl* _Impl);
400 
401  _Event_impl_ptr _M_ptr_event_impl;
402  };
403 
405 
406  _Ret_ _Accelerator_view_impl* _Get_accelerator_view_impl_ptr(const accelerator_view& _Accl_view);
407  _Ret_ _Accelerator_impl* _Get_accelerator_impl_ptr(const accelerator& _Accl);
408  _Event _Get_access_async(const _View_key _Key, accelerator_view _Av, _Access_mode _Mode, _Buffer_ptr &_Buf_ptr);
409  unsigned int _Get_mipmap_levels(const _Texture *_Tex);
410 
412  {
413  if ((_Mode != _Read_access) &&
414  (_Mode != _Write_access) &&
415  (_Mode != _Read_write_access))
416  {
417  return false;
418  }
419 
420  return true;
421  }
422 
423  // Caution: Do not change this structure defintion.
424  // This struct is special and is processed by the FE to identify the buffers
425  // used in a parallel_for_each and to setup the _M_data_ptr with the appropriate
426  // buffer ptr value in the device code.
427  typedef struct _Buffer_descriptor
428  {
429  friend _Event _Get_access_async(const _View_key _Key, accelerator_view _Av, _Access_mode _Mode, _Buffer_ptr &_Buf_ptr);
430 
431  // _M_data_ptr points to the raw data underlying the buffer for accessing on host
432  mutable void *_M_data_ptr;
433 
434  private:
435  // _M_buffer_ptr points to a _Ubiquitous_buffer that holds the data in an 1D array.
436  // This is private to ensure that all assignments to this data member
437  // only happen through public functions which properly manage the
438  // ref count of the underlying buffer
440 
441  public:
442  // _M_curr_cpu_access_mode specifies the current access mode of the data on the
443  // cpu accelerator_view specified at the time of registration of this view
445 
446  // _M_type_acess_mode specifies the access mode of the overlay type
447  // array_views set it to the appropriate access mode and for arrays it is
448  // always _Is_array_mode.
450 
451  public:
452  // Public functions
453 
454  // Default constructor
456  : _M_data_ptr(NULL), _M_buffer_ptr(NULL),
457  _M_curr_cpu_access_mode(_No_access), _M_type_access_mode(_Is_array_mode)
458  {
459  }
460 
461  _Buffer_descriptor(_In_ void *_Data_ptr, _In_ _Ubiquitous_buffer *_Buffer_ptr,
462  _Access_mode _Curr_cpu_access_mode, _Access_mode _Type_mode) __GPU
463  : _M_data_ptr(_Data_ptr), _M_buffer_ptr(NULL),
464  _M_curr_cpu_access_mode(_Curr_cpu_access_mode), _M_type_access_mode(_Type_mode)
465  {
466  _Set_buffer_ptr(_Buffer_ptr);
467  }
468 
469  // Destructor
471  {
473  }
474 
475  // Copy constructor
477  : _M_data_ptr(_Other._M_data_ptr), _M_buffer_ptr(NULL),
478  _M_curr_cpu_access_mode(_Other._M_curr_cpu_access_mode), _M_type_access_mode(_Other._M_type_access_mode)
479  {
480  _Set_buffer_ptr(_Other._M_buffer_ptr);
481  }
482 
483  // Assignment operator
485  {
486  if (this != &_Other)
487  {
488  _M_data_ptr = _Other._M_data_ptr;
489  _M_curr_cpu_access_mode = _Other._M_curr_cpu_access_mode;
490  _M_type_access_mode = _Other._M_type_access_mode;
491  _Set_buffer_ptr(_Other._M_buffer_ptr);
492  }
493 
494  return *this;
495  }
496 
498  {
499  return _M_buffer_ptr;
500  }
501 
503  {
504  if (_M_buffer_ptr != _Buffer_ptr)
505  {
506  if (_M_buffer_ptr != NULL) {
507  reinterpret_cast<_Reference_counter*>(_M_buffer_ptr)->_Remove_reference();
508  }
509 
510  _M_buffer_ptr = _Buffer_ptr;
511 
512  if (_M_buffer_ptr != NULL) {
513  reinterpret_cast<_Reference_counter*>(_M_buffer_ptr)->_Add_reference();
514  }
515  }
516  }
517 
518 #if !defined(_CXXAMP)
520  {
521  // No need to set the buffer ptr on the GPU
522  UNREFERENCED_PARAMETER(_Buffer_ptr);
523  _M_buffer_ptr = NULL;
524  }
525 #endif // _CXXAMP
526 
527  bool _Is_array() const
528  {
529  return (_M_type_access_mode == _Is_array_mode);
530  }
531 
532  _Ret_ _View_key _Get_view_key()
533  {
534  return this;
535  }
536 
537  const _View_key _Get_view_key() const
538  {
539  return ((const _View_key)(this));
540  }
541 
542  _AMPIMP void _Get_CPU_access(_Access_mode _Requested_mode) const;
543 
545 
546  // Caution: Do not change this structure defintion.
547  // This struct is special and is processed by the FE to identify the textures
548  // used in a parallel_for_each and to setup the _M_data_ptr with the appropriate
549  // texture ptr value in the device code.
550  typedef struct _Texture_descriptor
551  {
552  // _M_data_ptr points to the raw data underlying the texture
553  mutable IUnknown *_M_data_ptr;
554 
555  private:
556  // _M_texture_ptr points to a _Texture that holds the data
557  // This is private to ensure that all assignments to this data member
558  // only happen through public functions which properly manage the
559  // ref count of the underlying texture
561 
562  // The index of the most detailed (largest in size) mipmap level for the texture (or texture view)
563  // This value is always zero for the texture and might be non-zero for the texture views
565 
566  // Number of accessible mipmap levels for the texture (or texture view),
567  // e.g. if the texture has 3 mipmap levels ([0, 1, 2]),
568  // then read-only texture view with most detailed mipmap level equal to 1, can have 1 or 2 mipmap levels ([1] or [1, 2]).
569  // Further texture_views created on top of the texture view defined above can only narrow down the range of accessible mipmap levels.
570  unsigned int _M_view_mipmap_levels;
571 
572  public:
573  // Public functions
574 
575  // Default constructor
577  : _M_data_ptr(NULL), _M_texture_ptr(NULL), _M_most_detailed_mipmap_level(0), _M_view_mipmap_levels(0)
578  {
579  // Enables move constructor
580  }
581 
582  // Constructor for the texture
583  _Texture_descriptor(unsigned int _Most_detailed_mipmap_level, unsigned int _View_mipmap_levels) __GPU
584  : _M_data_ptr(NULL), _M_texture_ptr(NULL), _M_most_detailed_mipmap_level(_Most_detailed_mipmap_level), _M_view_mipmap_levels(_View_mipmap_levels)
585  {
586  }
587 
588  // Constructor for the interop texture
589  _Texture_descriptor(_In_ _Texture * _Texture_ptr) : _M_data_ptr(NULL), _M_texture_ptr(NULL), _M_most_detailed_mipmap_level(0) __CPU_ONLY
590  {
591  _Set_texture_ptr(_Texture_ptr);
592 
593  // Adopt number of mipmap levels from underlying texture object
594  _M_view_mipmap_levels = _Get_mipmap_levels(_M_texture_ptr);
595  }
596 
597  // Destructor
599  {
601  }
602 
603  // Copy constructor
605  : _M_data_ptr(_Other._M_data_ptr), _M_texture_ptr(NULL),
606  _M_most_detailed_mipmap_level(_Other._M_most_detailed_mipmap_level), _M_view_mipmap_levels(_Other._M_view_mipmap_levels)
607  {
608  _Set_texture_ptr(_Other._M_texture_ptr);
609  }
610 
611  // Copy constructor with ability to redefine mipmap information
612  _Texture_descriptor(const _Texture_descriptor &_Other, unsigned int _Most_detailed_mipmap_level, unsigned int _View_mipmap_levels) __GPU
613  : _M_data_ptr(_Other._M_data_ptr), _M_texture_ptr(NULL),
614  _M_most_detailed_mipmap_level(_Most_detailed_mipmap_level), _M_view_mipmap_levels(_View_mipmap_levels)
615  {
616  _Set_texture_ptr(_Other._M_texture_ptr);
617  }
618 
619  // Assignment operator
621  {
622  if (this != &_Other)
623  {
624  _M_data_ptr = _Other._M_data_ptr;
625  _Set_texture_ptr(_Other._M_texture_ptr);
626  _M_most_detailed_mipmap_level = _Other._M_most_detailed_mipmap_level;
627  _M_view_mipmap_levels = _Other._M_view_mipmap_levels;
628  }
629 
630  return *this;
631  }
632 
633  // Move constructor
635  {
636  *this = std::move(_Other);
637  }
638 
639  bool operator==(const _Texture_descriptor &_Other) const __GPU
640  {
641  return _M_texture_ptr == _Other._M_texture_ptr
642  && _M_data_ptr == _Other._M_data_ptr
643  && _M_most_detailed_mipmap_level == _Other._M_most_detailed_mipmap_level
644  && _M_view_mipmap_levels == _Other._M_view_mipmap_levels;
645  }
646 
648  {
649  _ASSERTE(_M_texture_ptr);
650  return _M_texture_ptr;
651  }
652 
654  {
656  }
657 
658  unsigned int _Get_view_mipmap_levels() const __GPU
659  {
660  return _M_view_mipmap_levels;
661  }
662 
663  void _Set_view_mipmap_levels(unsigned int _View_mipmap_levels) __CPU_ONLY
664  {
665  _M_view_mipmap_levels = _View_mipmap_levels;
666  }
667 
669  {
670  if (_M_texture_ptr != _Texture_ptr)
671  {
672  if (_M_texture_ptr != NULL) {
673  reinterpret_cast<_Reference_counter*>(_M_texture_ptr)->_Remove_reference();
674  }
675 
676  _M_texture_ptr = _Texture_ptr;
677 
678  if (_M_texture_ptr != NULL) {
679  reinterpret_cast<_Reference_counter*>(_M_texture_ptr)->_Add_reference();
680  }
681  }
682  }
683 
684 #if !defined(_CXXAMP)
686  {
687  // No need to set the texture ptr on the GPU
688  UNREFERENCED_PARAMETER(_Texture_ptr);
689  _M_texture_ptr = NULL;
690  }
691 #endif // _CXXAMP
692 
693  // This helper function is used to determine aliasing and copy violations
695  {
696  _ASSERTE(_Other);
697 
698  if (this->_Get_texture_ptr() != _Other->_Get_texture_ptr())
699  {
700  return false;
701  }
702 
703  return !((_M_most_detailed_mipmap_level < _Other->_M_most_detailed_mipmap_level) ? ((_M_most_detailed_mipmap_level + _M_view_mipmap_levels - 1) < _Other->_M_most_detailed_mipmap_level)
704  : ((_Other->_M_most_detailed_mipmap_level + _Other->_M_view_mipmap_levels - 1) < _M_most_detailed_mipmap_level));
705  }
706 
708 
709  // Caution: Do not change this structure defintion.
710  // This struct is special and is processed by the FE to identify the samplers
711  // used in a parallel_for_each.
712  typedef struct _Sampler_descriptor
713  {
714  // _M_data_ptr points to the sampler on accelerator
715  mutable void *_M_data_ptr;
716 
717  private:
718  // _M_sampler_ptr points to a _Sampler that holds the underlying sampler
719  // representation. This is private to ensure that all assignments to this data member
720  // only happen through public functions which properly manage the
721  // ref count of the underlying _Sampler object.
723 
724  public:
725  // Public functions
726 
727  // Default constructor
729  : _M_data_ptr(NULL), _M_sampler_ptr(NULL)
730  {
731  }
732 
735  {
736  _Set_sampler_ptr(_Sampler_ptr);
737  }
738 
739  // Destructor
741  {
743  }
744 
745  // Copy constructor
747  : _M_data_ptr(_Other._M_data_ptr), _M_sampler_ptr(NULL)
748  {
749  _Set_sampler_ptr(_Other._M_sampler_ptr);
750  }
751 
752  // Assignment operator
754  {
755  if (this != &_Other)
756  {
757  _M_data_ptr = _Other._M_data_ptr;
758  _Set_sampler_ptr(_Other._M_sampler_ptr);
759  }
760 
761  return *this;
762  }
763 
764  // Move constructor
766  {
767  *this = std::move(_Other);
768  }
769 
770  bool operator==(const _Sampler_descriptor &_Other) const __GPU
771  {
772  return _M_sampler_ptr == _Other._M_sampler_ptr && _M_data_ptr == _Other._M_data_ptr;
773  }
774 
776  {
777  return _M_sampler_ptr;
778  }
779 
781  {
782  if (_M_sampler_ptr != _Sampler_ptr)
783  {
784  if (_M_sampler_ptr != NULL) {
785  reinterpret_cast<_Reference_counter*>(_M_sampler_ptr)->_Remove_reference();
786  }
787 
788  _M_sampler_ptr = _Sampler_ptr;
789 
790  if (_M_sampler_ptr != NULL) {
791  reinterpret_cast<_Reference_counter*>(_M_sampler_ptr)->_Add_reference();
792  }
793  }
794  }
795 
796 #if !defined(_CXXAMP)
798  {
799  // No need to set the sampler ptr on the GPU
800  UNREFERENCED_PARAMETER(_Sampler_ptr);
801  _M_sampler_ptr = NULL;
802  }
803 #endif // _CXXAMP
804 
806 
807 } // namespace Concurrency::details
808 
809 // Forward declaration
810 class accelerator;
811 
812 namespace details
813 {
814  _AMPIMP size_t __cdecl _Get_num_devices();
815  _AMPIMP _Ret_ _Accelerator_impl_ptr * __cdecl _Get_devices();
816  _AMPIMP accelerator __cdecl _Select_default_accelerator();
817  _AMPIMP bool __cdecl _Set_default_accelerator(_Accelerator_impl_ptr _Accl);
818  _AMPIMP bool __cdecl _Is_D3D_accelerator_view(const accelerator_view& _Av);
819  _AMPIMP void __cdecl _Register_async_event(const _Event &_Ev, const std::shared_future<void> &_Shared_future);
820  _AMPIMP _Access_mode __cdecl _Get_recommended_buffer_host_access_mode(const accelerator_view &_Av);
821 }
822 
829 };
830 
835 class runtime_exception : public std::exception
836 {
837 public:
847  _AMPIMP runtime_exception(const char * _Message, HRESULT _Hresult) throw();
848 
855  _AMPIMP explicit runtime_exception(HRESULT _Hresult) throw();
856 
863  _AMPIMP runtime_exception(const runtime_exception &_Other) throw();
864 
871  _AMPIMP runtime_exception &operator=(const runtime_exception &_Other) throw();
872 
876  _AMPIMP virtual ~runtime_exception() throw();
877 
884  _AMPIMP HRESULT get_error_code() const throw();
885 
886 private:
887  HRESULT _M_error_code;
888 }; // class runtime_exception
889 
895 {
896 public:
903  _AMPIMP explicit out_of_memory(const char * _Message) throw();
904 
908  _AMPIMP out_of_memory () throw();
909 }; // class out_of_memory
910 
911 namespace direct3d
912 {
922  _AMPIMP _Ret_ IUnknown * __cdecl get_device(const accelerator_view &_Av);
923 
938 
961  _AMPIMP accelerator_view __cdecl create_accelerator_view(accelerator& _Accelerator, bool _Disable_timeout, queuing_mode _Qmode = queuing_mode_automatic);
962 
974  _AMPIMP bool __cdecl is_timeout_disabled(const accelerator_view& _Accelerator_view);
975 
990  _AMPIMP void __cdecl d3d_access_lock(accelerator_view &_Av);
991 
1001  _AMPIMP bool __cdecl d3d_access_try_lock(accelerator_view &_Av);
1002 
1010  _AMPIMP void __cdecl d3d_access_unlock(accelerator_view &_Av);
1011 
1017 
1022  {
1023  public:
1033 
1047 
1052 
1060  _AMPIMP scoped_d3d_access_lock(scoped_d3d_access_lock &&_Other);
1061 
1073  _AMPIMP scoped_d3d_access_lock& operator=(scoped_d3d_access_lock &&_Other);
1074 
1075  private:
1076  // No copy constructor
1077  scoped_d3d_access_lock(const scoped_d3d_access_lock &_Other);
1078 
1079  // No assignment operator
1080  scoped_d3d_access_lock & operator=(const scoped_d3d_access_lock &_Other);
1081 
1082  _Accelerator_view_impl_ptr _M_impl;
1083  };
1084 } // namespace direct3d
1085 
1090 {
1091  friend class accelerator_view;
1092 
1094 
1096 
1097  _AMPIMP friend accelerator_view __cdecl direct3d::create_accelerator_view(accelerator& _Accelerator, bool _Disable_timeout, queuing_mode _Qmode /* = queuing_mode_automatic */);
1098 
1099  friend _Ret_ details::_Accelerator_impl* details::_Get_accelerator_impl_ptr(const accelerator& _Accl);
1100 
1101 public:
1102 
1106  _AMPIMP static const wchar_t default_accelerator[];
1107 
1111  _AMPIMP static const wchar_t cpu_accelerator[];
1112 
1116  _AMPIMP static const wchar_t direct3d_warp[];
1117 
1121  _AMPIMP static const wchar_t direct3d_ref[];
1122 
1126  _AMPIMP accelerator();
1127 
1132  explicit accelerator(const std::wstring &_Device_path) : _M_impl(NULL)
1133  {
1134  _Init(_Device_path.c_str());
1135  }
1136 
1141 
1145  _AMPIMP accelerator(const accelerator &_Other);
1146 
1150  _AMPIMP accelerator &operator=(const accelerator &_Other);
1151 
1158  static inline std::vector<accelerator> get_all()
1159  {
1160  std::vector<accelerator> _AcceleratorVector;
1161  size_t _NumDevices = details::_Get_num_devices();
1162  for (size_t _I = 0; (_I < _NumDevices); ++_I)
1163  {
1164  _AcceleratorVector.push_back(details::_Get_devices()[_I]);
1165  }
1166 
1167  return _AcceleratorVector;
1168  }
1169 
1181  static inline bool set_default(const std::wstring& _Path)
1182  {
1183  accelerator _Accl(_Path);
1185  }
1186 
1196 
1201  {
1202  return _Get_device_path();
1203  }
1204 
1205  __declspec(property(get=get_device_path)) std::wstring device_path;
1206 
1210  _AMPIMP unsigned int get_version() const;
1211  __declspec(property(get=get_version)) unsigned int version; // hiword=major, loword=minor
1212 
1217  {
1218  return _Get_description();
1219  }
1220 
1221  __declspec(property(get=get_description)) std::wstring description;
1222 
1227  _AMPIMP bool get_is_debug() const;
1228  __declspec(property(get=get_is_debug)) bool is_debug;
1229 
1234  _AMPIMP bool get_is_emulated() const;
1235  __declspec(property(get=get_is_emulated)) bool is_emulated;
1236 
1241  _AMPIMP bool get_has_display() const;
1242  __declspec(property(get=get_has_display)) bool has_display;
1243 
1251  __declspec(property(get=get_supports_double_precision)) bool supports_double_precision;
1252 
1260  __declspec(property(get=get_supports_limited_double_precision)) bool supports_limited_double_precision;
1261 
1267  __declspec(property(get=get_supports_cpu_shared_memory)) bool supports_cpu_shared_memory;
1268 
1273  __declspec(property(get=get_default_view)) accelerator_view default_view;
1274 
1278  _AMPIMP size_t get_dedicated_memory() const;
1279  __declspec(property(get=get_dedicated_memory)) size_t dedicated_memory;
1280 
1285  __declspec(property(get=get_default_cpu_access_type)) access_type default_cpu_access_type;
1286 
1304  _AMPIMP bool set_default_cpu_access_type(access_type _Default_cpu_access_type);
1305 
1312 
1316  _AMPIMP bool operator==(const accelerator &_Other) const;
1317 
1321  _AMPIMP bool operator!=(const accelerator &_Other) const;
1322 
1323 private:
1324 
1325  // Private constructor
1326  _AMPIMP accelerator(_Accelerator_impl_ptr _Impl);
1327 
1328  // Private helper methods
1329  _AMPIMP const wchar_t *_Get_device_path() const;
1330  _AMPIMP const wchar_t *_Get_description() const;
1331 
1332  _AMPIMP void _Init(const wchar_t *_Path);
1333 
1334 private:
1335 
1336  _Accelerator_impl_ptr _M_impl;
1337 };
1338 
1343 {
1345 public:
1346 
1351  {
1352  }
1353 
1357  completion_future(const completion_future& _Other)
1358  : _M_shared_future(_Other._M_shared_future),
1359  _M_task(_Other._M_task)
1360  {
1361  }
1362 
1366  completion_future(completion_future&& _Other)
1367  : _M_shared_future(std::move(_Other._M_shared_future)),
1368  _M_task(std::move(_Other._M_task))
1369  {
1370  }
1371 
1376  {
1377  }
1378 
1382  completion_future& operator=(const completion_future& _Other)
1383  {
1384  if (this != &_Other) {
1385  _M_shared_future = _Other._M_shared_future;
1386  _M_task = _Other._M_task;
1387  }
1388 
1389  return (*this);
1390  }
1391 
1395  completion_future& operator=(completion_future&& _Other)
1396  {
1397  if (this != &_Other) {
1398  _M_shared_future = std::move(_Other._M_shared_future);
1399  _M_task = std::move(_Other._M_task);
1400  }
1401 
1402  return (*this);
1403  }
1404 
1410  void get() const
1411  {
1412  _M_shared_future.get();
1413  }
1414 
1423  bool valid() const
1424  {
1425  return _M_shared_future.valid();
1426  }
1427 
1431  void wait() const
1432  {
1433  _M_shared_future.wait();
1434  }
1435 
1445  template <class _Rep, class _Period>
1446  std::future_status wait_for(const std::chrono::duration<_Rep, _Period>& _Rel_time) const
1447  {
1448  return _M_shared_future.wait_for(_Rel_time);
1449  }
1450 
1460  template <class _Clock, class _Duration>
1461  std::future_status wait_until(const std::chrono::time_point<_Clock, _Duration>& _Abs_time) const
1462  {
1463  return _M_shared_future.wait_until(_Abs_time);
1464  }
1465 
1474  operator std::shared_future<void>() const
1475  {
1476  return _M_shared_future;
1477  }
1478 
1483  template <typename _Functor>
1484  void then(const _Functor &_Func) const
1485  {
1486  this->to_task().then(_Func);
1487  }
1488 
1498  {
1499  return _M_task;
1500  }
1501 
1502 private:
1503 
1504  // Private constructor
1505  completion_future(const std::shared_future<void> &_Shared_future,
1506  const concurrency::task<void>& _Task)
1507  : _M_shared_future(_Shared_future), _M_task(_Task)
1508  {
1509  }
1510 
1511  std::shared_future<void> _M_shared_future;
1513 };
1514 
1519 {
1520  friend class accelerator;
1521  friend class details::_Buffer;
1522  friend class details::_Texture;
1523  friend class details::_Sampler;
1526  friend class details::_D3D_accelerator_view_impl;
1527  friend class details::_CPU_accelerator_view_impl;
1529 
1530  _AMPIMP friend _Ret_ IUnknown * __cdecl direct3d::get_device(const accelerator_view &_Av);
1531 
1532  _AMPIMP friend accelerator_view __cdecl direct3d::create_accelerator_view(_In_ IUnknown *_D3D_device, queuing_mode qmode /* = queuing_mode_automatic */);
1533 
1534  _AMPIMP friend accelerator_view __cdecl direct3d::create_accelerator_view(accelerator& _Accelerator, bool _Disable_timeout, queuing_mode _Qmode /* = queuing_mode_automatic */);
1535 
1536  _AMPIMP friend bool __cdecl direct3d::is_timeout_disabled(const accelerator_view& _Accelerator_view);
1537 
1538  friend _Ret_ details::_Accelerator_view_impl* details::_Get_accelerator_view_impl_ptr(const accelerator_view& _Accl_view);
1539 
1540 public:
1541 
1546 
1550  _AMPIMP accelerator_view(const accelerator_view &_Other);
1551 
1556 
1560  _AMPIMP accelerator get_accelerator() const;
1561  __declspec(property(get=get_accelerator)) Concurrency::accelerator accelerator;
1562 
1567  _AMPIMP bool get_is_debug() const;
1568  __declspec(property(get=get_is_debug)) bool is_debug;
1569 
1573  _AMPIMP unsigned int get_version() const;
1574  __declspec(property(get=get_version)) unsigned int version; // hiword=major, loword=minor
1575 
1579  _AMPIMP queuing_mode get_queuing_mode() const;
1580  __declspec(property(get=get_queuing_mode)) Concurrency::queuing_mode queuing_mode;
1581 
1587  _AMPIMP bool get_is_auto_selection() const;
1588  __declspec(property(get=get_is_auto_selection)) bool is_auto_selection;
1589 
1593  _AMPIMP bool operator==(const accelerator_view &_Other) const;
1594 
1598  _AMPIMP bool operator!=(const accelerator_view &_Other) const;
1599 
1603  _AMPIMP void wait();
1604 
1609  _AMPIMP void flush();
1610 
1614  _AMPIMP concurrency::completion_future create_marker();
1615 
1616 private:
1617 
1618  // No default constructor
1619  accelerator_view();
1620 
1621  // Private constructor
1622  _AMPIMP accelerator_view(_Accelerator_view_impl_ptr _Impl, bool _Auto_selection = false);
1623 
1624 private:
1625 
1626  _Accelerator_view_impl_ptr _M_impl;
1627  bool _M_auto_selection;
1628 };
1629 
1630 namespace details
1631 {
1632  inline _Ret_ _Accelerator_view_impl* _Get_accelerator_view_impl_ptr(const accelerator_view& _Accl_view)
1633  {
1634  return _Accl_view._M_impl;
1635  }
1636 
1637  inline _Ret_ _Accelerator_impl* _Get_accelerator_impl_ptr(const accelerator& _Accl)
1638  {
1639  return _Accl._M_impl;
1640  }
1641 
1642  // Type defining a hasher for accelerator_view objects
1643  // for use with std::unordered_set and std::unordered_map
1645  {
1646  public:
1647  size_t operator()(const accelerator_view &_Accl_view) const
1648  {
1649  std::hash<_Accelerator_view_impl*> _HashFunctor;
1650  return _HashFunctor(_Accl_view._M_impl._Get_ptr());
1651  }
1652  };
1653 
1654  typedef std::unordered_set<accelerator_view, _Accelerator_view_hasher> _Accelerator_view_unordered_set;
1655 
1656  // Describes the N dimensional shape of a view in a buffer
1658  {
1659  public:
1660 
1661  _AMPIMP static _Ret_ _View_shape* __cdecl _Create_view_shape(unsigned int _Rank, unsigned int _Linear_offset,
1662  const unsigned int *_Base_extent, const unsigned int *_View_offset,
1663  const unsigned int *_View_extent, const bool *_Projection_info = NULL);
1664 
1665  _AMPIMP _Ret_ _View_shape* _Get_reduced_shape_for_copy();
1666 
1667  inline unsigned int _Get_rank() const
1668  {
1669  return _M_rank;
1670  }
1671 
1672  inline unsigned int _Get_linear_offset() const
1673  {
1674  return _M_linear_offset;
1675  }
1676 
1677  inline const unsigned int *_Get_base_extent() const
1678  {
1679  return _M_base_extent;
1680  }
1681 
1682  inline const unsigned int *_Get_view_offset() const
1683  {
1684  return _M_view_offset;
1685  }
1686  inline const unsigned int *_Get_view_extent() const
1687  {
1688  return _M_view_extent;
1689  }
1690 
1691  inline const bool *_Get_projection_info() const
1692  {
1693  return _M_projection_info;
1694  }
1695 
1696  inline bool _Is_projection() const
1697  {
1698  return _M_projection_info[0];
1699  }
1700 
1701  inline bool _Is_valid(size_t _Buffer_size) const
1702  {
1703  // The end point of the base shape should not be greater than the size of the buffer
1704  size_t endLinearOffset = _M_linear_offset + _Get_extent_size(_M_rank, _M_base_extent);
1705  if (endLinearOffset > _Buffer_size) {
1706  return false;
1707  }
1708 
1709  return _Is_valid();
1710  }
1711 
1712  inline unsigned int _Get_view_size() const
1713  {
1714  return _Get_extent_size(_M_rank, _M_view_extent);
1715  }
1716 
1717  inline unsigned int _Get_view_linear_offset() const
1718  {
1719  return _Get_linear_offset(_M_view_offset);
1720  }
1721 
1722  static inline bool
1723  _Compare_extent_with_elem_size(unsigned int _Rank, const unsigned int *_Extent1, size_t _Elem_size1, const unsigned int *_Extent2, size_t _Elem_size2)
1724  {
1725  _ASSERTE((_Rank >= 1) && (_Extent1 != NULL)&& (_Extent2 != NULL));
1726 
1727  // The extents should match accounting for the element sizes of the respective buffers
1728  if ((_Extent1[_Rank - 1] * _Elem_size1) != (_Extent2[_Rank - 1] * _Elem_size2))
1729  {
1730  return false;
1731  }
1732 
1733  // Now compare the extent in all but the least significant dimension
1734  if ((_Rank > 1) && !_Compare_extent(_Rank - 1, _Extent1, _Extent2))
1735  {
1736  return false;
1737  }
1738 
1739  return true;
1740  }
1741 
1742 
1743  static inline bool
1744  _Compare_extent(unsigned int _Rank, const unsigned int *_Extent1, const unsigned int *_Extent2)
1745  {
1746  for (size_t _I = 0; _I < _Rank; ++_I) {
1747  if (_Extent1[_I] != _Extent2[_I]) {
1748  return false;
1749  }
1750  }
1751 
1752  return true;
1753  }
1754 
1755  inline bool _Is_view_linear(unsigned int &_Linear_offset, unsigned int &_Linear_size) const
1756  {
1757  // The effective rank for the purpose of determining linearity
1758  // depends on the highest dimension in which the extent is not 1
1759  unsigned int _First_dim_with_non_unit_extent = 0;
1760  while ((_First_dim_with_non_unit_extent < _M_rank) && (_M_view_extent[_First_dim_with_non_unit_extent] == 1)) {
1761  _First_dim_with_non_unit_extent++;
1762  }
1763 
1764  unsigned int _Effective_rank = (_M_rank - _First_dim_with_non_unit_extent);
1765 
1766  // It is linear if the effective rank is <= 1 or the base extent
1767  // and view extent are same in all but the highest dimension with
1768  // non-unit extent
1769  if ((_Effective_rank <= 1) ||
1770  (_Compare_extent(_Effective_rank - 1, &_M_base_extent[_First_dim_with_non_unit_extent + 1], &_M_view_extent[_First_dim_with_non_unit_extent + 1])))
1771  {
1772  _Linear_offset = _Get_view_linear_offset();
1773  _Linear_size = _Get_view_size();
1774  return true;
1775  }
1776 
1777  return false;
1778  }
1779 
1780  inline bool _Overlaps(const _View_shape* _Other) const
1781  {
1782  if (_Compare_base_shape(_Other))
1783  {
1784  // If the base shapes are identical we will do the N-dimensional
1785  // bounding box overlap test
1786 
1787  for (size_t _I = 0; _I < _M_rank; ++_I)
1788  {
1789  if (!_Intervals_overlap(_M_view_offset[_I], _M_view_offset[_I] + _M_view_extent[_I] - 1,
1790  _Other->_M_view_offset[_I], _Other->_M_view_offset[_I] + _Other->_M_view_extent[_I] - 1))
1791  {
1792  return false;
1793  }
1794  }
1795 
1796  return true;
1797  }
1798  else
1799  {
1800  // The base shapes are different. Check based on linear intervals
1801  size_t firstStart = _Get_view_linear_offset();
1802  size_t firstEnd = firstStart + _Get_view_size() - 1;
1803 
1804  size_t secondStart = _Other->_Get_view_linear_offset();
1805  size_t secondEnd = secondStart + _Other->_Get_view_size() - 1;
1806 
1807  return _Intervals_overlap(firstStart, firstEnd, secondStart, secondEnd);
1808  }
1809  }
1810 
1811  inline bool _Subsumes(const _View_shape* _Other) const
1812  {
1813  // Subsumption test can only be done for shapes that have the same base shape or
1814  // when both have a rank of 1
1815  if ((_M_rank == 1) && (_Other->_Get_rank() == 1))
1816  {
1817  size_t thisStart = _Get_view_linear_offset();
1818  size_t thisEnd = thisStart + _Get_view_size() - 1;
1819 
1820  size_t otherStart = _Other->_Get_view_linear_offset();
1821  size_t otherEnd = otherStart + _Other->_Get_view_size() - 1;
1822 
1823  return ((otherStart >= thisStart) && (otherEnd <= thisEnd));
1824  }
1825 
1826  if (!_Compare_base_shape(_Other)) {
1827  return false;
1828  }
1829 
1830  if (!_Contains(_Other->_Get_view_offset())) {
1831  return false;
1832  }
1833 
1834  std::vector<unsigned int> otherEndPointIndex(_M_rank);
1835  for (size_t _I = 0; _I < _M_rank; ++_I) {
1836  otherEndPointIndex[_I] = _Other->_Get_view_offset()[_I] + _Other->_Get_view_extent()[_I] - 1;
1837  }
1838 
1839  return _Contains(otherEndPointIndex.data());
1840  }
1841 
1842  private:
1843  // Private constructor to force construction through the _Create_view_shape method
1844  _View_shape(unsigned int _Rank, unsigned int _Linear_offset,
1845  const unsigned int *_Base_extent, const unsigned int *_View_offset,
1846  const unsigned int *_View_extent, const bool *_Projection_info);
1847 
1848  virtual ~_View_shape();
1849 
1850  // No default constructor or copy/assignment
1851  _View_shape();
1852  _View_shape(const _View_shape &_Other);
1853  _View_shape(_View_shape &&_Other);
1854  _View_shape& operator=(const _View_shape &_Other);
1855  _View_shape& operator=(_View_shape &&_Other);
1856 
1857  // Helper methods
1858  static bool _Intervals_overlap(size_t _First_start, size_t _First_end,
1859  size_t _Second_start, size_t _Second_end)
1860  {
1861  // Order the intervals by their start points
1862  if (_First_start > _Second_start) {
1863  size_t temp = _First_start;
1864  _First_start = _Second_start;
1865  _Second_start = temp;
1866 
1867  temp = _First_end;
1868  _First_end = _Second_end;
1869  _Second_end = temp;
1870  }
1871 
1872  // The start of the second one must be within the bounds of the first one
1873  return (_Second_start <= _First_end);
1874  }
1875 
1876  static unsigned int _Get_extent_size(unsigned int _Rank, const unsigned int *_Extent)
1877  {
1878  unsigned int totalExtent = 1;
1879  for (size_t _I = 0; _I < _Rank; ++_I) {
1880  totalExtent *= _Extent[_I];
1881  }
1882 
1883  return totalExtent;
1884  }
1885 
1886  inline bool _Is_valid() const
1887  {
1888  if (_M_rank == 0) {
1889  return false;
1890  }
1891 
1892  // Ensure the _M_view_offset + _M_view_extent is within the bounds of _M_base_extent
1893  size_t viewSize = 1;
1894 
1895  for (size_t _I = 0; _I < _M_rank; ++_I)
1896  {
1897  viewSize *= _M_view_extent[_I];
1898  if ((_M_view_offset[_I] + _M_view_extent[_I]) > _M_base_extent[_I]) {
1899  return false;
1900  }
1901  }
1902 
1903  if (viewSize == 0) {
1904  return false;
1905  }
1906 
1907  return true;
1908  }
1909 
1910  inline bool _Compare_base_shape(const _View_shape* _Other) const
1911  {
1912  return ((_M_rank == _Other->_M_rank) &&
1913  (_M_linear_offset == _Other->_M_linear_offset) &&
1914  _Compare_extent(_M_rank, _M_base_extent, _Other->_M_base_extent));
1915  }
1916 
1917  // Checks if the element at the specified index
1918  // is contained within this view shape
1919  // Assumes the rank of the index is same as the
1920  // rank of this view's shape
1921  inline bool _Contains(const unsigned int* _Element_index) const
1922  {
1923  for (size_t _I = 0; _I < _M_rank; ++_I)
1924  {
1925  if ((_Element_index[_I] < _M_view_offset[_I]) ||
1926  (_Element_index[_I] >= (_M_view_offset[_I] + _M_view_extent[_I])))
1927  {
1928  return false;
1929  }
1930  }
1931 
1932  return true;
1933  }
1934 
1935  inline unsigned int _Get_linear_offset(const unsigned int* _Element_index) const
1936  {
1937  unsigned int currMultiplier = 1;
1938  unsigned int linearOffset = _M_linear_offset;
1939  for (int _I = static_cast<int>(_M_rank - 1); _I >= 0; _I--)
1940  {
1941  linearOffset += (currMultiplier * _Element_index[_I]);
1942  currMultiplier *= _M_base_extent[_I];
1943  }
1944 
1945  return linearOffset;
1946  }
1947 
1948  private:
1949 
1950  unsigned int _M_rank;
1951  unsigned int _M_linear_offset;
1952  unsigned int *_M_base_extent;
1953  unsigned int *_M_view_offset;
1954  unsigned int *_M_view_extent;
1956  };
1957 
1958  // This function creates a new _View_shape object from an existing _View_shape object when the data underlying the view
1959  // needs to be reinterpreted to use a different element size than the one used by the original view.
1960  inline
1961  _Ret_ _View_shape *_Create_reinterpreted_shape(const _View_shape* _Source_shape, size_t _Curr_elem_size, size_t _New_elem_size)
1962  {
1963  unsigned int _Rank = _Source_shape->_Get_rank();
1964  size_t _LinearOffsetInBytes = _Source_shape->_Get_linear_offset() * _Curr_elem_size;
1965  size_t _BaseLSDExtentInBytes = (_Source_shape->_Get_base_extent())[_Rank - 1] * _Curr_elem_size;
1966  size_t _ViewLSDOffsetInBytes = (_Source_shape->_Get_view_offset())[_Rank - 1] * _Curr_elem_size;
1967  size_t _ViewLSDExtentInBytes = (_Source_shape->_Get_view_extent())[_Rank - 1] * _Curr_elem_size;
1968 
1969  _ASSERTE((_LinearOffsetInBytes % _New_elem_size) == 0);
1970  _ASSERTE((_BaseLSDExtentInBytes % _New_elem_size) == 0);
1971  _ASSERTE((_ViewLSDOffsetInBytes % _New_elem_size) == 0);
1972  _ASSERTE((_ViewLSDExtentInBytes % _New_elem_size) == 0);
1973 
1974  size_t _Temp_val = _LinearOffsetInBytes / _New_elem_size;
1975  _ASSERTE(_Temp_val <= UINT_MAX);
1976  unsigned int _New_linear_offset = static_cast<unsigned int>(_Temp_val);
1977 
1978  std::vector<unsigned int> _New_base_extent(_Rank);
1979  std::vector<unsigned int> _New_view_offset(_Rank);
1980  std::vector<unsigned int> _New_view_extent(_Rank);
1981  for (unsigned int i = 0; i < _Rank - 1; ++i) {
1982  _New_base_extent[i] = (_Source_shape->_Get_base_extent())[i];
1983  _New_view_offset[i] = (_Source_shape->_Get_view_offset())[i];
1984  _New_view_extent[i] = (_Source_shape->_Get_view_extent())[i];
1985  }
1986 
1987  // The extent in the least significant dimension needs to be adjusted
1988  _Temp_val = _BaseLSDExtentInBytes / _New_elem_size;
1989  _ASSERTE(_Temp_val <= UINT_MAX);
1990  _New_base_extent[_Rank - 1] = static_cast<unsigned int>(_Temp_val);
1991 
1992  _Temp_val = _ViewLSDOffsetInBytes / _New_elem_size;
1993  _ASSERTE(_Temp_val <= UINT_MAX);
1994  _New_view_offset[_Rank - 1] = static_cast<unsigned int>(_Temp_val);
1995 
1996  _Temp_val = _ViewLSDExtentInBytes / _New_elem_size;
1997  _ASSERTE(_Temp_val <= UINT_MAX);
1998  _New_view_extent[_Rank - 1] = static_cast<unsigned int>(_Temp_val);
1999 
2000  return _View_shape::_Create_view_shape(_Rank, _New_linear_offset, _New_base_extent.data(), _New_view_offset.data(), _New_view_extent.data());
2001  }
2002 
2004  {
2005  switch(cpu_access_type)
2006  {
2007  case access_type_auto:
2008  case access_type_read:
2009  return _Read_access;
2010  case access_type_write:
2011  return _Write_access;
2013  return _Read_write_access;
2014  case access_type_none:
2015  default:
2016  _ASSERTE(false);
2017  return _No_access;
2018  }
2019  }
2020 
2022  {
2023  access_type _Cpu_access_type = access_type_none;
2024  if (_Cpu_access_mode & _Read_access) {
2025  _Cpu_access_type = static_cast<access_type>(_Cpu_access_type | access_type_read);
2026  }
2027 
2028  if (_Cpu_access_mode & _Write_access) {
2029  _Cpu_access_type = static_cast<access_type>(_Cpu_access_type | access_type_write);
2030  }
2031 
2032  return _Cpu_access_type;
2033  }
2034 
2035  // Class manages a raw buffer in a accelerator view
2037  {
2038  friend class _CPU_accelerator_view_impl;
2039  friend class _D3D_accelerator_view_impl;
2040  friend class _D3D_temp_staging_cache;
2041 
2042  public:
2043 
2044  // Force construction through these static public method to ensure that _Buffer
2045  // objects are allocated in the runtime
2046 
2047  // Allocate a new buffer on the specified accelerator_view
2048  _AMPIMP static _Ret_ _Buffer * __cdecl _Create_buffer(accelerator_view _Accelerator_view, accelerator_view _Access_on_accelerator_view, size_t _Num_elems,
2049  size_t _Elem_size, bool _Is_temp = false, access_type _Cpu_access_type = access_type_auto);
2050 
2051  // Create a buffer object from a pre-allocated storage on the specified accelerator_view. This can be thought
2052  // of as the accelerator_view "adopting" the passed data buffer.
2053  _AMPIMP static _Ret_ _Buffer * __cdecl _Create_buffer(_In_ void *_Data_ptr, accelerator_view _Accelerator_view, size_t _Num_elems,
2054  size_t _Elem_size);
2055 
2056  // Create a staging buffer on the specified accelerator_view which can be accesed on the cpu upon mapping.
2057  _AMPIMP static _Ret_ _Buffer * __cdecl _Create_stage_buffer(accelerator_view _Accelerator_view, accelerator_view _Access_on_accelerator_view,
2058  size_t _Num_elems, size_t _Elem_size, bool _Is_temp = false);
2059 
2060  // Creates a temp staging buffer of the requested size. This function may create
2061  // a staging buffer smaller than the requested size.
2062  _AMPIMP static _Ret_ _Buffer * __cdecl _Get_temp_staging_buffer(accelerator_view _Av, size_t _Requested_num_elems, size_t _Elem_size);
2063 
2064  // Map a zero-copy or staging buffer for access on the CPU.
2065  _AMPIMP void _Map_buffer(_Access_mode _Map_type, bool _Wait);
2066 
2067  // Asynchronously map a zero-copy or staging buffer for access on the CPU.
2068  _AMPIMP _Event _Map_buffer_async(_Access_mode _Map_type);
2069 
2070  // Unmap a zero-copy or staging buffer denying CPU access
2071  _AMPIMP void _Unmap_buffer();
2072 
2073  // Copy data to _Dest asynchronously.
2074  _AMPIMP _Event _Copy_to_async(_Out_ _Buffer * _Dest, size_t _Num_elems, size_t _Src_offset = 0, size_t _Dest_offset = 0);
2075 
2076  // Copy data to _Dest asynchronously.
2077  _AMPIMP _Event _Copy_to_async(_Out_ _Buffer * _Dest, _View_shape_ptr _Src_shape, _View_shape_ptr _Dest_shape);
2078 
2079  _AMPIMP accelerator_view _Get_accelerator_view() const;
2080  _AMPIMP accelerator_view _Get_access_on_accelerator_view() const;
2081 
2082  _AMPIMP void _Register_view(_In_ _View_key _Key);
2083  _AMPIMP void _Unregister_view(_In_ _View_key _Key);
2084 
2085  // Return the raw data ptr - only a accelerator view implementation can interpret
2086  // this raw pointer. This method should usually not be used in the AMP header files
2087  // The _Get_host_ptr is the right way for accessing the host accesible ptr for a buffer
2088  _Ret_ void * _Get_data_ptr() const
2089  {
2090  return _M_data_ptr;
2091  }
2092 
2093  // Returns the host accessible ptr corresponding to the buffer. This would
2094  // return NULL when the buffer is inaccesible on the CPU
2095  _Ret_ void * _Get_host_ptr() const
2096  {
2097  return _M_host_ptr;
2098  }
2099 
2100  size_t _Get_elem_size() const
2101  {
2102  return _M_elem_size;
2103  }
2104 
2105  size_t _Get_num_elems() const
2106  {
2107  return _M_num_elems;
2108  }
2109 
2110  _Ret_ _Accelerator_view_impl* _Get_accelerator_view_impl() const
2111  {
2112  return _M_accelerator_view;
2113  }
2114 
2115  _Ret_ _Accelerator_view_impl* _Get_access_on_accelerator_view_impl() const
2116  {
2117  return _M_access_on_accelerator_view;
2118  }
2119 
2120  bool _Owns_data() const
2121  {
2122  return _M_owns_data;
2123  }
2124 
2125  _AMPIMP bool _Exclusively_owns_data();
2126 
2127  bool _Is_staging() const
2128  {
2129  return _M_is_staging;
2130  }
2131 
2133  {
2134  return _M_allowed_host_access_mode;
2135  }
2136 
2138  {
2139  return _Get_cpu_access_type(_M_allowed_host_access_mode);
2140  }
2141 
2142  bool _Is_host_accessible(_Access_mode _Requested_access_mode) const
2143  {
2144  return ((_Get_allowed_host_access_mode() & _Requested_access_mode) == _Requested_access_mode);
2145  }
2146 
2148  {
2149  return _M_current_host_access_mode;
2150  }
2151 
2152  bool _Is_temp() const
2153  {
2154  return _M_is_temp;
2155  }
2156 
2157  bool _Is_adopted() const
2158  {
2159  // Is it adopted from interop?
2160  return _M_is_adopted;
2161  }
2162 
2163  bool _Is_buffer() const
2164  {
2165  return _M_is_buffer;
2166  }
2167 
2168  _AMPIMP bool _Is_mappable() const;
2169 
2170  protected:
2171 
2172  // The _Buffer constructor is protected to force construction through the static
2173  // _Create_buffer method to ensure the object is allocated in the runtime
2174  _Buffer(_In_ _Accelerator_view_impl* _Av, _In_ void *_Buffer_data_ptr, _In_ void * _Host_ptr,
2175  _Access_mode _Allowed_host_access_mode, _Access_mode _Current_host_access_mode, size_t _Num_elems,
2176  size_t _Elem_size, bool _Owns_data, bool _Is_staging, bool _Is_temp, bool _Is_adopted);
2177 
2178  // protected destructor to force deletion through _Release
2179  virtual ~_Buffer();
2180 
2181  // No default consturctor, copy constructor and assignment operator
2182  _Buffer();
2183  _Buffer(const _Buffer &rhs);
2184  _Buffer &operator=(const _Buffer &rhs);
2185 
2186  void _Set_host_ptr(_In_ void *_Host_ptr, _Access_mode _Host_access_mode = _No_access)
2187  {
2188  _ASSERTE((_Host_ptr == NULL) || (_Host_access_mode != _No_access));
2189 
2190  _M_host_ptr = _Host_ptr;
2191  if (_Host_ptr == NULL) {
2192  _M_current_host_access_mode = _No_access;
2193  }
2194  else {
2195  _M_current_host_access_mode = _Host_access_mode;
2196  }
2197  }
2198 
2199  void _Set_data_ptr(_In_ IUnknown *_Data_ptr)
2200  {
2201  _M_data_ptr = _Data_ptr;
2202  }
2203 
2204  protected:
2205  _Accelerator_view_impl_ptr _M_accelerator_view;
2206  _Accelerator_view_impl_ptr _M_access_on_accelerator_view;
2207  void * _M_data_ptr;
2208  void * _M_host_ptr;
2215 
2216  // Used to determine how to map the staging buffer after its involved in a copy
2218 
2221  private:
2222  // A set of view_keys to invalidate whenever the host ptr of a staging buffer is invalidated
2223  std::unique_ptr<std::unordered_set<_View_key>> _M_view_keys;
2225  };
2226 
2227  // Class manages a texture in a accelerator view
2228  class _Texture : public _Buffer
2229  {
2230  friend class _CPU_accelerator_view_impl;
2231  friend class _D3D_accelerator_view_impl;
2232  friend class _D3D_temp_staging_cache;
2233 
2234  public:
2235 
2236  // Allocate a new texture on the specified accelerator_view
2237  _AMPIMP static _Ret_ _Texture * __cdecl _Create_texture(accelerator_view _Accelerator_view,
2238  unsigned int _Rank,
2239  size_t _Width, size_t _Height, size_t _Depth,
2240  unsigned int _Mip_levels,
2241  _Short_vector_base_type_id _Type_id,
2242  unsigned int _Num_channels,
2243  unsigned int _Bits_per_channel,
2244  bool _Is_temp = false);
2245 
2246  // Create a texture object from a pre-allocated storage on the specified accelerator_view. This can be thought
2247  // of as the accelerator_view "adopting" the passed data buffer.
2248  _AMPIMP static _Ret_ _Texture * __cdecl _Adopt_texture(unsigned int _Rank, _Texture_base_type_id _Id,
2249  _In_ IUnknown *_Data_ptr, accelerator_view _Accelerator_view,
2250  unsigned int _View_format);
2251 
2252  // Create a staging texture on the specified accelerator_view which can be accesed on the cpu upon mapping.
2253  _AMPIMP static _Ret_ _Texture * __cdecl _Create_stage_texture(accelerator_view _Accelerator_view, accelerator_view _Access_on_accelerator_view,
2254  unsigned int _Rank,
2255  size_t _Width, size_t _Height, size_t _Depth,
2256  unsigned int _Mip_levels,
2257  unsigned int _Format,
2258  bool _Is_temp = false);
2259 
2260  // Create a staging texture on the specified accelerator_view which can be accesed on the cpu upon mapping.
2261  _AMPIMP static _Ret_ _Texture * __cdecl _Create_stage_texture(accelerator_view _Accelerator_view, accelerator_view _Access_on_accelerator_view,
2262  unsigned int _Rank,
2263  size_t _Width, size_t _Height, size_t _Depth,
2264  unsigned int _Mip_levels,
2265  _Short_vector_base_type_id _Type_id,
2266  unsigned int _Num_channels,
2267  unsigned int _Bits_per_channel);
2268 
2269  // Creates a temp staging texture. This function may create
2270  // a staging texture smaller than the requested size.
2271  _AMPIMP static _Ret_ _Texture * __cdecl _Get_temp_staging_texture(accelerator_view _Accelerator_view,
2272  unsigned int _Rank,
2273  size_t _Width, size_t _Height, size_t _Depth,
2274  unsigned int _Mip_levels,
2275  unsigned int _Format);
2276 
2277  // Constructs a new texture with the same properties as the given texture.
2278  _AMPIMP static _Ret_ _Texture * __cdecl _Clone_texture(const _Texture *_Src, const accelerator_view &_Accelerator_view, const accelerator_view &_Associated_av);
2279 
2280  // Copy data to _Dest asynchronously for textures. The two textures must have been created with
2281  // compatible physical formats.
2282  _AMPIMP _Event _Copy_to_async(_Out_ _Texture * _Dest, const size_t *_Copy_extent,
2283  const size_t *_Src_offset, const size_t *_Dst_offset,
2284  unsigned int _Src_mipmap_level, unsigned int _Dst_mipmap_level);
2285 
2286  size_t _Get_width(unsigned int _Mip_offset = 0) const
2287  {
2288  return (_M_width >> _Mip_offset) ? (_M_width >> _Mip_offset) : 1U;
2289  }
2290 
2291  size_t _Get_height(unsigned int _Mip_offset = 0) const
2292  {
2293  return (_M_height >> _Mip_offset) ? (_M_height >> _Mip_offset) : 1U;
2294  }
2295 
2296  size_t _Get_depth(unsigned int _Mip_offset = 0) const
2297  {
2298  return (_M_depth >> _Mip_offset) ? (_M_depth >> _Mip_offset) : 1U;
2299  }
2300 
2301  unsigned int _Get_rank() const
2302  {
2303  return _M_rank;
2304  }
2305 
2306  unsigned int _Get_texture_format() const
2307  {
2308  return _M_texture_format;
2309  }
2310 
2311  unsigned int _Get_view_format() const
2312  {
2313  return _M_view_format;
2314  }
2315 
2316  unsigned int _Get_num_channels() const
2317  {
2318  return _M_num_channels;
2319  }
2320 
2321  unsigned int _Get_bits_per_channel() const
2322  {
2323  // For texture adopted from interop, return 0.
2324  return _Is_adopted() ? 0 : _M_bits_per_channel;
2325  }
2326 
2327  unsigned int _Get_bits_per_element() const
2328  {
2329  return _M_bits_per_channel * _M_num_channels;
2330  }
2331 
2332  unsigned int _Get_data_length(unsigned int _Most_detailed_mipmap_level, unsigned int _View_mipmap_levels, const size_t *_Extents = nullptr) const // in bytes
2333  {
2334  _ASSERTE(_View_mipmap_levels);
2335 
2336  unsigned long long _Bits_per_byte = 8ULL;
2337  unsigned long long _Total_bytes = 0ULL;
2338 
2339  unsigned int _Mip_level = _Most_detailed_mipmap_level;
2340 
2341  // Sum up data length (in bytes) of all mipmap levels in the view
2342  for (unsigned int _Mip_offset=0; _Mip_offset < _View_mipmap_levels; ++_Mip_offset)
2343  {
2344  unsigned long long _Width = 1ULL;
2345  unsigned long long _Height = 1ULL;
2346  unsigned long long _Depth = 1ULL;
2347 
2348  if (_Extents)
2349  {
2350  switch (_M_rank)
2351  {
2352  case 3:
2353  _Depth = (_Extents[2] >> _Mip_level) ? (_Extents[2] >> _Mip_level) : 1U;
2354  // deliberately fall thru
2355  case 2:
2356  _Height = (_Extents[1] >> _Mip_level) ? (_Extents[1] >> _Mip_level) : 1U;
2357  // deliberately fall thru
2358  case 1:
2359  _Width = (_Extents[0] >> _Mip_level) ? (_Extents[0] >> _Mip_level) : 1U;
2360  break;
2361  default:
2362  _ASSERTE(false); // textures are only rank 1-3
2363  }
2364  }
2365  else
2366  {
2367  _Width = _Get_width(_Mip_level);
2368  _Height = _Get_height(_Mip_level);
2369  _Depth = _Get_depth(_Mip_level);
2370  }
2371 
2372  // Note _Get_bits_per_element() can be smaller than 8
2373  // Use unsigned long long to avoid integer overflow
2374  _Total_bytes += ((_Width * _Height * _Depth * static_cast<unsigned long long>(_Get_bits_per_element())) + _Bits_per_byte - 1) / _Bits_per_byte;
2375 
2376  _Mip_level++;
2377  }
2378 
2379  return static_cast<unsigned int>(_Total_bytes);
2380  }
2381 
2382  unsigned int _Get_mip_levels() const
2383  {
2384  return _M_mip_levels;
2385  }
2386 
2387  size_t _Get_row_pitch() const
2388  {
2389  return _M_row_pitch;
2390  }
2391 
2392  void _Set_row_pitch(size_t _Val)
2393  {
2394  _M_row_pitch = _Val;
2395  }
2396 
2397  size_t _Get_depth_pitch() const
2398  {
2399  return _M_depth_pitch;
2400  }
2401 
2402  void _Set_depth_pitch(size_t _Val)
2403  {
2404  _M_depth_pitch = _Val;
2405  }
2406 
2407  private:
2408 
2409  // The _Texture constructor is private to force construction through the static
2410  // _Create_texture method to ensure the object is allocated in the runtime
2411  _Texture(_In_ _Accelerator_view_impl* _Av, _In_ void *_Texture_data_ptr, _In_ void * _Host_ptr,
2412  _Access_mode _Allowed_host_access_mode, _Access_mode _Current_host_access_mode,
2413  unsigned int _Rank,
2414  size_t _Width, size_t _Height, size_t _Depth,
2415  unsigned int _Mip_levels,
2416  unsigned int _Texture_format,
2417  unsigned int _View_format,
2418  unsigned int _Num_channels,
2419  unsigned int _Bits_per_channel,
2420  bool _Owns_data, bool _Is_staging, bool _Is_temp, bool _Is_adopted);
2421 
2422  // Private destructor to force deletion through _Release
2423  ~_Texture();
2424 
2425  // No default consturctor, copy constructor and assignment operator
2426  _Texture();
2427  _Texture(const _Texture &rhs);
2428  _Texture &operator=(const _Texture &rhs);
2429 
2430  // Texture only
2431  unsigned int _M_rank;
2432  size_t _M_width;
2433  size_t _M_height;
2434  size_t _M_depth;
2435  unsigned int _M_texture_format;
2436  unsigned int _M_view_format;
2437  unsigned int _M_bits_per_channel;
2438  unsigned int _M_num_channels;
2439  unsigned int _M_mip_levels;
2440 
2443  };
2444 
2446  {
2447  public:
2448  // Create a new sampler with configurations exposed by C++ AMP.
2449  _AMPIMP static _Ret_ _Sampler * __cdecl _Create(
2450  unsigned int _Filter_mode,
2451  unsigned int _Address_mode,
2452  float _Border_r,
2453  float _Border_g,
2454  float _Border_b,
2455  float _Border_a);
2456 
2457  // Create a sampler object given an adopted opaque data pointer
2458  _AMPIMP static _Ret_ _Sampler * __cdecl _Create(_In_ void *_Data_ptr);
2459 
2460  // Return the raw data ptr - only an accelerator view implementation can interpret
2461  // this raw pointer. This method should usually not be used in the AMP header files
2462  _Ret_ void * _Get_data_ptr() const
2463  {
2464  return _M_data_ptr;
2465  }
2466 
2467  bool _Is_adopted() const
2468  {
2469  // Is it adopted from interop?
2470  return _M_is_adopted;
2471  }
2472 
2473  unsigned int _Get_filter_mode() const
2474  {
2475  return _M_filter_mode;
2476  }
2477 
2478  unsigned int _Get_address_mode() const
2479  {
2480  return _M_address_mode;
2481  }
2482 
2483  const float* _Get_border_color() const
2484  {
2485  return &_M_border_color[0];
2486  }
2487 
2488  private:
2489  // The _Sampler constructor is private to force construction through the static
2490  // _Create method to ensure the object is allocated in the runtime
2491  _Sampler(unsigned int _Filter_mode, unsigned int _Address_mode, float _Border_r, float _Border_g, float _Border_b, float _Border_a);
2492 
2493  _Sampler(_In_ void *_Data_ptr);
2494 
2495  // Private destructor to force deletion through _Release
2496  ~_Sampler();
2497 
2498  // No default consturctor, copy constructor and assignment operator
2499  _Sampler();
2500  _Sampler(const _Sampler &rhs);
2501  _Sampler &operator=(const _Sampler &rhs);
2502 
2503  void * _M_data_ptr;
2505  unsigned int _M_filter_mode;
2506  unsigned int _M_address_mode;
2507  float _M_border_color[4];
2508  };
2509 
2510  // Forward declaration for copy helper functions
2511  _AMPIMP _Event __cdecl _Copy_impl(_In_ _Buffer *_Src, size_t _Src_offset,
2512  _Out_ _Buffer * _Dst, size_t _Dest_offset,
2513  size_t _Num_elems, size_t _Preferred_copy_chunk_num_elems = 0);
2514 
2515  _AMPIMP _Event __cdecl _Copy_async_impl(_In_ _Texture *_Src_tex, const size_t *_Src_offset, unsigned int _Src_mipmap_level,
2516  _Out_ _Texture *_Dst_tex, const size_t *_Dst_offset, unsigned int _Dst_mipmap_level,
2517  const size_t *_Copy_extent, const size_t *_Preferred_copy_chunk_extent = NULL);
2518 
2519  inline bool _Get_chunked_staging_texture(_In_ _Texture* _Tex, const size_t *_Copy_chunk_extent, _Inout_ size_t *_Remaining_copy_extent, _Out_ size_t *_Curr_copy_extent, _Out_ _Texture_ptr *_Staging_texture)
2520  {
2521  bool _Truncated_copy = false;
2522  size_t _Allocation_extent[3] = { _Copy_chunk_extent[0], _Copy_chunk_extent[1], _Copy_chunk_extent[2] };
2523 
2524  unsigned int _Most_sig_idx = _Tex->_Get_rank() - 1;
2525 
2526  if (_Allocation_extent[_Most_sig_idx] > _Remaining_copy_extent[_Most_sig_idx]) {
2527  _Allocation_extent[_Most_sig_idx] = _Remaining_copy_extent[_Most_sig_idx];
2528  }
2529 
2530  _Texture_ptr _Stage = _Texture::_Get_temp_staging_texture(_Tex->_Get_accelerator_view(), _Tex->_Get_rank(),
2531  _Allocation_extent[0], _Allocation_extent[1], _Allocation_extent[2],
2532  /*_Mip_levels=*/1, _Tex->_Get_texture_format());
2533 
2534  std::copy(&_Allocation_extent[0], &_Allocation_extent[3], stdext::make_unchecked_array_iterator(&_Curr_copy_extent[0]));
2535  size_t _Staging_tex_extent[3] = {_Stage->_Get_width(), _Stage->_Get_height(), _Stage->_Get_depth()};
2536  if (_Curr_copy_extent[_Most_sig_idx] > _Staging_tex_extent[_Most_sig_idx]) {
2537  _Curr_copy_extent[_Most_sig_idx] = _Staging_tex_extent[_Most_sig_idx];
2538  }
2539 
2540  // The truncation can however happen only in the most significant dimension and lower
2541  // dimensions should not get truncated
2542  if (_Curr_copy_extent[_Most_sig_idx] < _Remaining_copy_extent[_Most_sig_idx])
2543  {
2544  _Remaining_copy_extent[_Most_sig_idx] -= _Curr_copy_extent[_Most_sig_idx];
2545  _Truncated_copy = true;
2546  }
2547 
2548  for (unsigned int _I = 0; _I < _Most_sig_idx; _I++)
2549  {
2550  _ASSERTE(_Curr_copy_extent[_I] == _Remaining_copy_extent[_I]);
2551  }
2552 
2553  *_Staging_texture = _Stage;
2554  return _Truncated_copy;
2555  }
2556 
2557  #pragma warning ( push )
2558  #pragma warning ( disable : 6101 )
2559  // Supress "warning C6101: Returning uninitialized memory '*_Dst'.: A successful"
2560  // "path through the function does not set the named _Out_ parameter."
2561  // The callers to _Copy_data_on_host all have static_assert that _Rank has to be 1, 2, or 3 dimensions for texture
2562  //
2563  template <typename _Input_iterator, typename _Value_type>
2564  inline void _Copy_data_on_host(int _Rank, _Input_iterator _Src, _Out_ _Value_type *_Dst,
2565  size_t _Width, size_t _Height, size_t _Depth,
2566  size_t _Dst_row_pitch_in_bytes, size_t _Dst_depth_pitch_in_bytes,
2567  size_t _Src_row_pitch, size_t _Src_depth_pitch)
2568  {
2569  switch(_Rank)
2570  {
2571  case 1:
2572  {
2573  _Input_iterator _End = _Src;
2574  std::advance(_End, _Width);
2576  }
2577  break;
2578  case 2:
2579  {
2580  unsigned char *_Dst_ptr = reinterpret_cast<unsigned char *>(_Dst);
2581  _Input_iterator _Src_start = _Src;
2582  for (size_t _I = 0; _I < _Height; _I++)
2583  {
2584  _Input_iterator _Src_end = _Src_start;
2585  std::advance(_Src_end, _Width);
2586 
2587  std::copy(_Src_start, _Src_end, stdext::make_unchecked_array_iterator(reinterpret_cast<_Value_type*>(_Dst_ptr)));
2588 
2589  _Dst_ptr += _Dst_row_pitch_in_bytes;
2590  std::advance(_Src_start, _Src_row_pitch);
2591  }
2592  }
2593  break;
2594  case 3:
2595  {
2596  unsigned char *_Dst_ptr_slice_start = reinterpret_cast<unsigned char *>(_Dst);
2597  _Input_iterator _Src_depth_slice_start = _Src;
2598  for (size_t _I = 0; _I < _Depth; _I++)
2599  {
2600  _Input_iterator _Src_start = _Src_depth_slice_start;
2601  unsigned char *_Dst_ptr = _Dst_ptr_slice_start;
2602 
2603  for (size_t _J = 0; _J < _Height; _J++)
2604  {
2605  _Input_iterator _Src_end = _Src_start;
2606  std::advance(_Src_end, _Width);
2607 
2608  std::copy(_Src_start, _Src_end, stdext::make_unchecked_array_iterator(reinterpret_cast<_Value_type*>(_Dst_ptr)));
2609 
2610  _Dst_ptr += _Dst_row_pitch_in_bytes;
2611  std::advance(_Src_start, _Src_row_pitch);
2612  }
2613 
2614  _Dst_ptr_slice_start += _Dst_depth_pitch_in_bytes;
2615  std::advance(_Src_depth_slice_start, _Src_depth_pitch);
2616  }
2617  }
2618  break;
2619  default:
2620  _ASSERTE(FALSE);
2621  break;
2622  }
2623  }
2624  #pragma warning ( pop ) // disable : 6101
2625 
2626  template <typename _Output_iterator, typename _Value_type>
2627  inline void _Copy_data_on_host(int _Rank, const _Value_type * _Src, _Output_iterator _Dst,
2628  size_t _Width, size_t _Height, size_t _Depth,
2629  size_t _Src_row_pitch_in_bytes, size_t _Src_depth_pitch_in_bytes,
2630  size_t _Dst_row_pitch, size_t _Dst_depth_pitch)
2631  {
2632  switch(_Rank)
2633  {
2634  case 1:
2635  {
2636  const _Value_type * _End = _Src + _Width;
2638  }
2639  break;
2640  case 2:
2641  {
2642  const unsigned char *_Src_ptr = reinterpret_cast<const unsigned char *>(_Src);
2643  _Output_iterator _Dst_iter = _Dst;
2644  for (size_t _I = 0; _I < _Height; _I++)
2645  {
2646  const _Value_type * _Src_end = reinterpret_cast<const _Value_type*>(_Src_ptr) + _Width;
2647 
2648  std::copy(stdext::make_unchecked_array_iterator(reinterpret_cast<const _Value_type*>(_Src_ptr)), stdext::make_unchecked_array_iterator(_Src_end), _Dst_iter);
2649  std::advance(_Dst_iter, _Dst_row_pitch);
2650  _Src_ptr += _Src_row_pitch_in_bytes;
2651  }
2652  }
2653  break;
2654  case 3:
2655  {
2656  const unsigned char *_Src_ptr_slice_start = reinterpret_cast<const unsigned char *>(_Src);
2657  _Output_iterator _Dst_depth_slice_start = _Dst;
2658  for (size_t _I = 0; _I < _Depth; _I++)
2659  {
2660  _Output_iterator _Dst_iter = _Dst_depth_slice_start;
2661  const unsigned char *_Src_ptr = _Src_ptr_slice_start;
2662 
2663  for (size_t _J = 0; _J < _Height; _J++)
2664  {
2665  const _Value_type * _Src_end = reinterpret_cast<const _Value_type *>(_Src_ptr) + _Width;
2666 
2667  std::copy(stdext::make_unchecked_array_iterator(reinterpret_cast<const _Value_type*>(_Src_ptr)), stdext::make_unchecked_array_iterator(_Src_end), _Dst_iter);
2668 
2669  std::advance(_Dst_iter, _Dst_row_pitch);
2670  _Src_ptr += _Src_row_pitch_in_bytes;
2671  }
2672 
2673  _Src_ptr_slice_start += _Src_depth_pitch_in_bytes;
2674  std::advance(_Dst_depth_slice_start, _Dst_depth_pitch);
2675  }
2676  }
2677  break;
2678  default:
2679  _ASSERTE(FALSE);
2680  break;
2681  }
2682  }
2683 
2684  _AMPIMP size_t __cdecl _Get_preferred_copy_chunk_size(size_t _Total_copy_size_in_bytes);
2685 
2686  inline size_t _Get_preferred_copy_chunk_num_elems(size_t _Total_num_elems, size_t _Elem_size)
2687  {
2688  size_t preferredChunkSize = _Get_preferred_copy_chunk_size(_Total_num_elems * _Elem_size);
2689 
2690  return (preferredChunkSize / _Elem_size);
2691  }
2692 
2693  inline void _Get_preferred_copy_chunk_extent(unsigned int _Rank, size_t _Width, size_t _Height,
2694  size_t _Depth, size_t _Bits_per_element, _Out_writes_(3) size_t *_Preferred_copy_chunk_extent)
2695  {
2696  _ASSERTE(_Preferred_copy_chunk_extent != nullptr);
2697 
2698  size_t requestedByteSize = static_cast<size_t>((static_cast<unsigned long long>(_Width) *
2699  static_cast<unsigned long long>(_Height) *
2700  static_cast<unsigned long long>(_Depth) *
2701  static_cast<unsigned long long>(_Bits_per_element)) >> 3);
2702 
2703  size_t preferredChunkSize = _Get_preferred_copy_chunk_size(requestedByteSize);
2704 
2705  // Lets align the allocation size to the element size of the texture
2706  size_t preferredCopyChunkNumElems = static_cast<size_t>((static_cast<unsigned long long>(preferredChunkSize) * 8U) / _Bits_per_element);
2707 
2708  // Lets truncate the dimensions of the requested staging texture.
2709  // We only truncate in the most significant dimension
2710  switch (_Rank)
2711  {
2712  case 1:
2713  _Width = preferredCopyChunkNumElems;
2714  break;
2715  case 2:
2716  _Height = (preferredCopyChunkNumElems + _Width - 1) / _Width;
2717  break;
2718  case 3:
2719  _Depth = (preferredCopyChunkNumElems + (_Height * _Width) - 1) / (_Height * _Width);
2720  break;
2721  default:
2722  _ASSERTE(false);
2723  }
2724 
2725  _Preferred_copy_chunk_extent[0] = _Width;
2726  _Preferred_copy_chunk_extent[1] = _Height;
2727  _Preferred_copy_chunk_extent[2] = _Depth;
2728  }
2729 
2730  // Finds the greatest common divisor of 2 unsigned integral numbers using Euclid's algorithm
2731  template <typename _T>
2733  {
2734  static_assert(std::is_unsigned<_T>::value, "This GCD function only supports unsigned integral types");
2735 
2736  _ASSERTE((_M > 0) && (_N > 0));
2737 
2738  if (_N > _M) {
2739  std::swap(_N , _M);
2740  }
2741 
2742  _T _Temp;
2743  while (_N > 0)
2744  {
2745  _Temp = _N;
2746  _N = _M % _N;
2747  _M = _Temp;
2748  }
2749 
2750  return _M;
2751  }
2752 
2753  // Finds the least common multiple of 2 unsigned integral numbers using their greatest_common_divisor
2754  template <typename _T>
2756  {
2757  static_assert(std::is_unsigned<_T>::value, "This LCM function only supports unsigned integral types");
2758 
2759  _ASSERTE((_M > 0) && (_N > 0));
2760 
2761  _T _Gcd = _Greatest_common_divisor(_M, _N);
2762  return ((_M / _Gcd) * _N);
2763  }
2764 
2765  template <typename InputIterator, typename _Value_type>
2766  inline _Event _Copy_impl(InputIterator _SrcFirst, InputIterator _SrcLast, size_t _NumElemsToCopy,
2767  _Out_ _Buffer * _Dst, size_t _Dest_offset, size_t _Preferred_copy_chunk_num_elems = 0)
2768  {
2769  if (_NumElemsToCopy == 0) {
2770  return _Event();
2771  }
2772 
2773  if (_Dst == NULL) {
2774  throw runtime_exception("Failed to copy to buffer.", E_INVALIDARG);
2775  }
2776 
2777 #pragma warning ( push )
2778 #pragma warning ( disable : 6001 ) // Using uninitialized memory '*_Dst'
2779  if (((_NumElemsToCopy * sizeof(_Value_type)) + (_Dest_offset * _Dst->_Get_elem_size())) > (_Dst->_Get_num_elems() * _Dst->_Get_elem_size()))
2780  {
2781  throw runtime_exception("Invalid _Src argument(s). _Src size exceeds total size of the _Dest.", E_INVALIDARG);
2782  }
2783 #pragma warning ( pop )
2784 
2785  _ASSERTE(_NumElemsToCopy == (size_t)(std::distance(_SrcFirst, _SrcLast)));
2786 
2787  // If the dest is host accessible for write then we do the copy on
2788  // accelerator(accelerator::cpu_accelerator).default_view
2789  if (_Dst->_Is_host_accessible(_Write_access))
2790  {
2791  // Lets first map the _Dst buffer
2792  _Event _Ev = _Dst->_Map_buffer_async(_Write_access);
2793 
2794  // The _Dest is accessible on host. We just need to do a std::copy using a raw pointer as OutputIterator
2795  _Buffer_ptr _PDestBuf = _Dst;
2796  _Ev = _Ev._Add_continuation(std::function<_Event()>([_PDestBuf,_Dest_offset, _SrcFirst, _SrcLast]() mutable -> _Event
2797  {
2798  _Value_type *_DestPtr = reinterpret_cast<_Value_type*>(reinterpret_cast<char*>(_PDestBuf->_Get_host_ptr()) + (_Dest_offset * _PDestBuf->_Get_elem_size()));
2799  std::copy(_SrcFirst, _SrcLast, stdext::make_unchecked_array_iterator(_DestPtr));
2800 
2801  return _Event();
2802  }));
2803 
2804  return _Ev;
2805  }
2806  else
2807  {
2808  // _Dest is on a device. Lets create a temp staging buffer on the _Dest accelerator_view and copy the input over
2809  // We may create a staging buffer of size smaller than the copy size and in that case we will perform the copy
2810  // as a series of smaller copies
2811  _Buffer_ptr _PDestBuf = _Dst;
2812  size_t _NumElemsToCopyRemaining = _NumElemsToCopy;
2813  size_t _PreferredNumElemsToCopyPerChunk = _Preferred_copy_chunk_num_elems;
2814  if (_PreferredNumElemsToCopyPerChunk == 0) {
2815  // If a preferred copy chunk size was not specified, lets pick one based on the
2816  // size of the copy
2817  _PreferredNumElemsToCopyPerChunk = _Get_preferred_copy_chunk_num_elems(_NumElemsToCopy, sizeof(_Value_type));
2818  }
2819  size_t _CurrDstOffset = _Dest_offset;
2820  InputIterator _CurrStartIter = _SrcFirst;
2821  _Event _Ev;
2822 
2823  size_t _Lcm = _Least_common_multiple(_Dst->_Get_elem_size(), sizeof(_Value_type));
2824  size_t _AdjustmentRatio = _Lcm / sizeof(_Value_type);
2825 
2826  do
2827  {
2828  size_t _AllocationNumElems = _PreferredNumElemsToCopyPerChunk;
2829  if (_NumElemsToCopyRemaining < _AllocationNumElems) {
2830  _AllocationNumElems = _NumElemsToCopyRemaining;
2831  }
2832 
2833  _Buffer_ptr _PDestStagingBuf = _Buffer::_Get_temp_staging_buffer(_Dst->_Get_accelerator_view(),
2834  _AllocationNumElems, sizeof(_Value_type));
2835 
2836  _ASSERTE(_PDestStagingBuf != NULL);
2837  _ASSERTE(_PDestStagingBuf->_Get_elem_size() == sizeof(_Value_type));
2838 
2839  InputIterator _CurrEndIter = _CurrStartIter;
2840  size_t _CurrNumElemsToCopy = _AllocationNumElems;
2841  if (_CurrNumElemsToCopy > _PDestStagingBuf->_Get_num_elems()) {
2842  _CurrNumElemsToCopy = _PDestStagingBuf->_Get_num_elems();
2843  }
2844 
2845  if (_NumElemsToCopyRemaining <= _CurrNumElemsToCopy) {
2846  _CurrNumElemsToCopy = _NumElemsToCopyRemaining;
2847  _CurrEndIter = _SrcLast;
2848  }
2849  else
2850  {
2851  // We need to adjust the _CurrNumElemsToCopy to be a multiple of the
2852  // least common multiple of the destination buffer's element size and sizeof(_Value_type).
2853  _CurrNumElemsToCopy = (_CurrNumElemsToCopy / _AdjustmentRatio) * _AdjustmentRatio;
2854  std::advance(_CurrEndIter, _CurrNumElemsToCopy);
2855  }
2856 
2857  _ASSERTE((_CurrNumElemsToCopy % _AdjustmentRatio) == 0);
2858 
2859  // This would not actually never block since we just created this staging buffer or are using
2860  // a cached one that is not in use
2861  _PDestStagingBuf->_Map_buffer(_Write_access, true /* _Wait */);
2862 
2863  // Copy from input to the staging using a raw pointer as OutputIterator
2864  std::copy(_CurrStartIter, _CurrEndIter, stdext::make_unchecked_array_iterator(reinterpret_cast<_Value_type*>(_PDestStagingBuf->_Get_host_ptr())));
2865 
2866  _Ev = _Ev._Add_event(_PDestStagingBuf->_Copy_to_async(_PDestBuf, _CurrNumElemsToCopy, 0, _CurrDstOffset));
2867 
2868  // Adjust the iterators and offsets
2869  _NumElemsToCopyRemaining -= _CurrNumElemsToCopy;
2870  _CurrDstOffset += (_CurrNumElemsToCopy * sizeof(_Value_type)) / _Dst->_Get_elem_size();
2871  _CurrStartIter = _CurrEndIter;
2872 
2873  } while (_NumElemsToCopyRemaining != 0);
2874 
2875  return _Ev;
2876  }
2877  }
2878 
2879  // The std::advance method is only supported for InputIterators and hence we have a custom implementation
2880  // which forwards to the std::advance if the iterator is an input iterator and uses a loop based advance
2881  // implementation otherwise
2882  template<typename _InputIterator, typename _Distance>
2883  typename std::enable_if<std::is_base_of<std::input_iterator_tag, typename std::iterator_traits<_InputIterator>::iterator_category>::value>::type
2884  _Advance_output_iterator(_InputIterator &_Iter, _Distance _N)
2885  {
2886  std::advance(_Iter, _N);
2887  }
2888 
2889  template<typename _OutputIterator, typename _Distance>
2890  typename std::enable_if<!std::is_base_of<std::input_iterator_tag, typename std::iterator_traits<_OutputIterator>::iterator_category>::value>::type
2891  _Advance_output_iterator(_OutputIterator &_Iter, size_t _N)
2892  {
2893  for (size_t i = 0; i < _N; ++i)
2894  {
2895  _Iter++;
2896  }
2897  }
2898 
2899  template <typename OutputIterator, typename _Value_type>
2900  inline _Event _Copy_impl(_In_ _Buffer *_Src, size_t _Src_offset, size_t _Num_elems,
2901  OutputIterator _DestIter, size_t _Preferred_copy_chunk_num_elems = 0)
2902  {
2903  if ((_Src == NULL) || ((_Src_offset + _Num_elems) > _Src->_Get_num_elems())) {
2904  throw runtime_exception("Failed to copy to buffer.", E_INVALIDARG);
2905  }
2906 
2907  if (_Num_elems == 0) {
2908  return _Event();
2909  }
2910 
2911  size_t _NumElemsToCopy = (_Num_elems * _Src->_Get_elem_size()) / sizeof(_Value_type);
2912 
2913  // If the src is host accessible for readthen we do the copy on
2914  // accelerator(accelerator::cpu_accelerator).default_view
2915  if (_Src->_Is_host_accessible(_Read_access))
2916  {
2917  // Map the _Src buffer
2918  _Event _Ev = _Src->_Map_buffer_async(_Read_access);
2919 
2920  // The _Src is accessible on host. We just need to do a std::copy using a raw pointer as OutputIterator
2921  _Buffer_ptr _PSrcBuf = _Src;
2922  _Ev = _Ev._Add_continuation(std::function<_Event()>([_PSrcBuf, _Src_offset, _DestIter, _NumElemsToCopy]() mutable -> _Event
2923  {
2924  // The _Src is accessible on host. We just need to do a std::copy
2925  const _Value_type *_PFirst = reinterpret_cast<const _Value_type*>(reinterpret_cast<char*>(_PSrcBuf->_Get_host_ptr()) + (_Src_offset * _PSrcBuf->_Get_elem_size()));
2926  std::copy(_PFirst, _PFirst + _NumElemsToCopy, _DestIter);
2927 
2928  return _Event();
2929  }));
2930 
2931  return _Ev;
2932  }
2933  else
2934  {
2935  // The _Src is on the device. We need to copy it out to a temporary staging array
2936  // We may create a staging buffer of size smaller than the copy size and in that case we will
2937  // perform the copy as a series of smaller copies
2938 
2939  _Event _Ev;
2940 
2941  _Buffer_ptr _PSrcBuf = _Src;
2942  size_t _PreferredNumElemsToCopyPerChunk = _Preferred_copy_chunk_num_elems;
2943  if (_PreferredNumElemsToCopyPerChunk == 0) {
2944  // If a preferred copy chunk size was not specified, lets pick one based on the
2945  // size of the copy
2946  _PreferredNumElemsToCopyPerChunk = _Get_preferred_copy_chunk_num_elems(_NumElemsToCopy, sizeof(_Value_type));
2947  }
2948 
2949  size_t _AllocationNumElems = _PreferredNumElemsToCopyPerChunk;
2950  if (_NumElemsToCopy < _AllocationNumElems) {
2951  _AllocationNumElems = _NumElemsToCopy;
2952  }
2953 
2954  _Buffer_ptr _PSrcStagingBuf = _Buffer::_Get_temp_staging_buffer(_Src->_Get_accelerator_view(),
2955  _AllocationNumElems, sizeof(_Value_type));
2956 
2957  _ASSERTE(_PSrcStagingBuf != NULL);
2958  _ASSERTE(_PSrcStagingBuf->_Get_elem_size() == sizeof(_Value_type));
2959 
2960  // The total byte size of a copy chunk must be an integral multiple of both the
2961  // source buffer's element size and sizeof(_Value_type).
2962  size_t _Lcm = _Least_common_multiple(_Src->_Get_elem_size(), sizeof(_Value_type));
2963  size_t _AdjustmentRatio = _Lcm / sizeof(_Value_type);
2964 
2965  size_t _CurrNumElemsToCopy = _AllocationNumElems;
2966  if (_CurrNumElemsToCopy > _PSrcStagingBuf->_Get_num_elems()) {
2967  _CurrNumElemsToCopy = _PSrcStagingBuf->_Get_num_elems();
2968  }
2969  if (_NumElemsToCopy <= _CurrNumElemsToCopy)
2970  {
2971  _CurrNumElemsToCopy = _NumElemsToCopy;
2972  }
2973  else
2974  {
2975  // We need to adjust the _StagingBufNumElems to be a multiple of the
2976  // least common multiple of the source buffer's element size and sizeof(_Value_type).
2977  _CurrNumElemsToCopy = (_CurrNumElemsToCopy / _AdjustmentRatio) * _AdjustmentRatio;
2978  }
2979 
2980  _ASSERTE((_CurrNumElemsToCopy % _AdjustmentRatio) == 0);
2981 
2982  size_t _NumElemsToCopyRemaining = _NumElemsToCopy - _CurrNumElemsToCopy;
2983 
2984  _Ev = _PSrcBuf->_Copy_to_async(_PSrcStagingBuf, (_CurrNumElemsToCopy * sizeof(_Value_type)) / _PSrcBuf->_Get_elem_size(), _Src_offset, 0);
2985 
2986  if (_NumElemsToCopyRemaining != 0)
2987  {
2988  _Ev = _Ev._Add_continuation(std::function<_Event()>([_DestIter, _PSrcBuf, _PSrcStagingBuf,
2989  _CurrNumElemsToCopy, _NumElemsToCopyRemaining,
2990  _Src_offset, _PreferredNumElemsToCopyPerChunk]() mutable -> _Event
2991  {
2992  // Initiate an asynchronous copy of the remaining part so that this part of the copy
2993  // makes progress while we consummate the copying of the first part
2994  size_t _CurrSrcOffset = _Src_offset + ((_CurrNumElemsToCopy * sizeof(_Value_type)) / _PSrcBuf->_Get_elem_size());
2995  OutputIterator _CurrDestIter = _DestIter;
2996  _Advance_output_iterator<decltype(_CurrDestIter), size_t>(_CurrDestIter, _CurrNumElemsToCopy);
2997  _Event _Ret_ev = _Copy_impl<OutputIterator, _Value_type>(_PSrcBuf._Get_ptr(), _CurrSrcOffset,
2998  (_NumElemsToCopyRemaining * sizeof(_Value_type)) / _PSrcBuf->_Get_elem_size(),
2999  _CurrDestIter, _PreferredNumElemsToCopyPerChunk);
3000 
3001  // Now copy the data from staging buffer to the destination
3002  _Value_type *_PFirst = reinterpret_cast<_Value_type*>(_PSrcStagingBuf->_Get_host_ptr());
3003  std::copy(_PFirst, _PFirst + _CurrNumElemsToCopy, _DestIter);
3004  return _Ret_ev;
3005  }));
3006  }
3007  else
3008  {
3009  _Ev = _Ev._Add_continuation(std::function<_Event()>([_DestIter, _PSrcStagingBuf, _CurrNumElemsToCopy]() mutable -> _Event
3010  {
3011  _Value_type *_PFirst = reinterpret_cast<_Value_type*>(_PSrcStagingBuf->_Get_host_ptr());
3012  std::copy(_PFirst, _PFirst + _CurrNumElemsToCopy, _DestIter);
3013  return _Event();
3014  }));
3015  }
3016 
3017  return _Ev;
3018  }
3019  }
3020 
3021  // Structured copy between buffers across AVs
3022  _AMPIMP _Event __cdecl _Copy_impl(_In_ _Buffer *_Src, _View_shape_ptr _Src_shape, _Out_ _Buffer * _Dst, _View_shape_ptr _Dst_shape);
3023 
3025  {
3027  const unsigned int _Rank,
3028  const unsigned int _Src_linear_offset,
3029  const unsigned int * _Src_extents,
3030  const unsigned int * _Src_copy_offset,
3031  const unsigned int _Dst_linear_offset,
3032  const unsigned int * _Dst_extents,
3033  const unsigned int * _Dst_copy_offset,
3034  const unsigned int * _Copy_extents)
3035  {
3036  this->_Rank = _Rank;
3037 
3038  this->_Src_linear_offset = _Src_linear_offset;
3039  this->_Src_extents.assign( _Src_extents, _Src_extents + _Rank);
3040  this->_Src_copy_offset.assign( _Src_copy_offset, _Src_copy_offset + _Rank);
3041 
3042  this->_Dst_linear_offset = _Dst_linear_offset;
3043  this->_Dst_extents.assign( _Dst_extents, _Dst_extents + _Rank);
3044  this->_Dst_copy_offset.assign( _Dst_copy_offset, _Dst_copy_offset + _Rank);
3045 
3046  this->_Copy_extents.assign( _Copy_extents, _Copy_extents + _Rank);
3047  }
3048 
3050 
3051  unsigned int _Rank;
3052 
3053  // Shape of source
3054  unsigned int _Src_linear_offset;
3055  std::vector<unsigned int> _Src_extents;
3056  std::vector<unsigned int> _Src_copy_offset;
3057 
3058  // Shape of destination
3059  unsigned int _Dst_linear_offset;
3060  std::vector<unsigned int> _Dst_extents;
3061  std::vector<unsigned int> _Dst_copy_offset;
3062 
3063  // Shape of copy region
3064  std::vector<unsigned int> _Copy_extents;
3065  };
3066 
3067  // Declaration
3068  _AMPIMP HRESULT __cdecl _Recursive_array_copy(const _Array_copy_desc& _Desc,
3069  unsigned int _Native_copy_rank,
3070  std::function<HRESULT(const _Array_copy_desc &_Reduced)> _Native_copy_func);
3071 
3072  _AMPIMP std::pair<accelerator_view, accelerator_view> __cdecl _Get_src_dest_accelerator_view(_In_opt_ const _Buffer_descriptor *_SrcBuffDescPtr,
3073  _In_opt_ const _Buffer_descriptor *_DestBuffDescPtr);
3074 
3075  // Iterator based copy function
3076  template<typename _InputInterator, typename _OutputIterator>
3077  inline _Event _Copy_impl_iter(_InputInterator _SrcFirst, _InputInterator _SrcLast, _OutputIterator _DstFirst)
3078  {
3079  std::copy(_SrcFirst, _SrcLast, _DstFirst);
3080  return _Event();
3081  }
3082 
3083  // Iterator based copy function
3084  template <typename InputIterator, typename _Value_type>
3085  inline _Event _Copy_impl(InputIterator _SrcFirst, _View_shape_ptr _Src_shape, _Inout_ _Buffer * _Dst, _View_shape_ptr _Dst_shape)
3086  {
3087  _ASSERTE(_Dst != NULL);
3088  _ASSERTE(_Src_shape != NULL);
3089  _ASSERTE(_Dst_shape != NULL);
3090 
3091  if (_Src_shape->_Is_projection()) {
3092  _Src_shape = _Src_shape->_Get_reduced_shape_for_copy();
3093  }
3094 
3095  if (_Dst_shape->_Is_projection()) {
3096  _Dst_shape = _Dst_shape->_Get_reduced_shape_for_copy();
3097  }
3098 
3099  _ASSERTE(_Src_shape->_Get_rank() == _Dst_shape->_Get_rank());
3100 
3101  _ASSERTE(_View_shape::_Compare_extent_with_elem_size(_Src_shape->_Get_rank(), _Src_shape->_Get_view_extent(),
3102  sizeof(_Value_type), _Dst_shape->_Get_view_extent(), _Dst->_Get_elem_size()));
3103 
3104  if (_Dst->_Is_host_accessible(_Write_access))
3105  {
3106  // The destination buffer is accesible on the host. Map the _Dst buffer
3107  _Event _Ev = _Dst->_Map_buffer_async(_Write_access);
3108  _Buffer_ptr _PDestBuf = _Dst;
3109  return _Ev._Add_continuation(std::function<_Event()>([_SrcFirst, _Src_shape, _PDestBuf, _Dst_shape]() mutable -> _Event {
3110  return _Copy_impl_iter(_SrcFirst, _Src_shape, stdext::make_unchecked_array_iterator(reinterpret_cast<_Value_type*>(_PDestBuf->_Get_host_ptr())),
3111  _Create_reinterpreted_shape(_Dst_shape, _PDestBuf->_Get_elem_size(), sizeof(_Value_type)));
3112  }));
3113  }
3114  else
3115  {
3116  // The dest buffer is not accesible on host. Lets create a temporary
3117  // staging buffer on the destination buffer's accelerator_view
3118  _Buffer_ptr _PTempStagingBuf = _Buffer::_Create_stage_buffer(_Dst->_Get_accelerator_view(), accelerator(accelerator::cpu_accelerator).default_view,
3119  _Src_shape->_Get_view_size(), sizeof(_Value_type), true /* _Is_temp */);
3120 
3121  _PTempStagingBuf->_Map_buffer(_Write_access, true /* _Wait */);
3122  _Value_type *_Dst_ptr = reinterpret_cast<_Value_type*>(_PTempStagingBuf->_Get_host_ptr());
3123  _Event _Ev = _Copy_impl_iter(_SrcFirst, _Src_shape, stdext::make_unchecked_array_iterator(_Dst_ptr), _Src_shape);
3124 
3125  // Now copy from the staging buffer to the destination buffer
3126  _Buffer_ptr _PDestBuf = _Dst;
3127  return _Ev._Add_continuation(std::function<_Event()>([_PTempStagingBuf, _Src_shape, _PDestBuf, _Dst_shape]() mutable -> _Event {
3128  return _Copy_impl(_PTempStagingBuf, _Src_shape, _PDestBuf, _Dst_shape);
3129  }));
3130  }
3131  }
3132 
3133  template <typename OutputIterator, typename _Value_type>
3134  inline _Event _Copy_impl(_In_ _Buffer *_Src, _View_shape_ptr _Src_shape, OutputIterator _DestIter, _View_shape_ptr _Dst_shape)
3135  {
3136  _ASSERTE(_Src != NULL);
3137  _ASSERTE(_Src_shape != NULL);
3138  _ASSERTE(_Dst_shape != NULL);
3139 
3140  if (_Src_shape->_Is_projection()) {
3141  _Src_shape = _Src_shape->_Get_reduced_shape_for_copy();
3142  }
3143 
3144  if (_Dst_shape->_Is_projection()) {
3145  _Dst_shape = _Dst_shape->_Get_reduced_shape_for_copy();
3146  }
3147 
3148  _ASSERTE(_Src_shape->_Get_rank() == _Dst_shape->_Get_rank());
3149 
3150  _ASSERTE(_View_shape::_Compare_extent_with_elem_size(_Src_shape->_Get_rank(), _Src_shape->_Get_view_extent(),
3151  _Src->_Get_elem_size(), _Dst_shape->_Get_view_extent(), sizeof(_Value_type)));
3152 
3153  if (_Src->_Is_host_accessible(_Read_access))
3154  {
3155  // The source buffer is accessible on the host. Map the _Src buffer
3156  _Event _Ev = _Src->_Map_buffer_async(_Read_access);
3157 
3158  _Buffer_ptr _PSrcBuf = _Src;
3159  return _Ev._Add_continuation(std::function<_Event()>([_PSrcBuf, _Src_shape, _DestIter, _Dst_shape]() mutable -> _Event {
3160  return _Copy_impl_iter(reinterpret_cast<_Value_type*>(_PSrcBuf->_Get_host_ptr()),
3161  _Create_reinterpreted_shape(_Src_shape, _PSrcBuf->_Get_elem_size(), sizeof(_Value_type)),
3162  _DestIter, _Dst_shape);
3163  }));
3164  }
3165  else
3166  {
3167  // The source buffer is not accessible on host. Lets create a temporary
3168  // staging buffer on the source buffer's accelerator_view and initiate a copy
3169  // from the source buffer to the temporary staging buffer
3170  _Buffer_ptr _PTempStagingBuf = _Buffer::_Create_stage_buffer(_Src->_Get_accelerator_view(), accelerator(accelerator::cpu_accelerator).default_view,
3171  _Dst_shape->_Get_view_size(), sizeof(_Value_type), true);
3172 
3173  _Event _Ev = _Src->_Copy_to_async(_PTempStagingBuf, _Src_shape, _Dst_shape);
3174  return _Ev._Add_continuation(std::function<_Event()>([_PTempStagingBuf, _Dst_shape, _DestIter]() mutable -> _Event {
3175  return _Copy_impl_iter(reinterpret_cast<_Value_type*>(_PTempStagingBuf->_Get_host_ptr()),
3176  _Dst_shape, _DestIter, _Dst_shape);
3177  }));
3178  }
3179  }
3180 
3181  // Iterator based structured copy function
3182  template<typename _InputInterator, typename _OutputIterator>
3183  inline _Event _Copy_impl_iter(_InputInterator _SrcIter, _View_shape_ptr _Src_shape,
3184  _OutputIterator _DstIter, _View_shape_ptr _Dst_shape)
3185  {
3186  if (_Src_shape->_Is_projection()) {
3187  _Src_shape = _Src_shape->_Get_reduced_shape_for_copy();
3188  }
3189 
3190  if (_Dst_shape->_Is_projection()) {
3191  _Dst_shape = _Dst_shape->_Get_reduced_shape_for_copy();
3192  }
3193 
3194  _ASSERTE(_Src_shape->_Get_rank() == _Dst_shape->_Get_rank());
3195  _ASSERTE(_View_shape::_Compare_extent(_Src_shape->_Get_rank(), _Src_shape->_Get_view_extent(), _Dst_shape->_Get_view_extent()));
3196 
3197  // If both the _Src_shape and _Dst_shape are linear we can be more efficient
3198  unsigned int _Src_linear_offset, _Src_linear_size, _Dst_linear_offset, _Dst_linear_size;
3199  if (_Src_shape->_Is_view_linear(_Src_linear_offset, _Src_linear_size) &&
3200  _Dst_shape->_Is_view_linear(_Dst_linear_offset, _Dst_linear_size))
3201  {
3202  _ASSERTE(_Src_linear_size == _Dst_linear_size);
3203 
3204  // These iterators might be not contiguous, therefore we use std::advance
3205  std::advance(_SrcIter, _Src_linear_offset);
3206  auto _SrcLast = _SrcIter;
3207  std::advance(_SrcLast, _Src_linear_size);
3208  std::advance(_DstIter, _Dst_linear_offset);
3209 
3210  return _Copy_impl_iter(_SrcIter, _SrcLast, _DstIter);
3211  }
3212 
3213  std::vector<unsigned int> _Src_extent(_Src_shape->_Get_rank());
3214  std::vector<unsigned int> _Src_offset(_Src_shape->_Get_rank());
3215  std::vector<unsigned int> _Dst_extent(_Dst_shape->_Get_rank());
3216  std::vector<unsigned int> _Dst_offset(_Dst_shape->_Get_rank());
3217  std::vector<unsigned int> _Copy_extent(_Src_shape->_Get_rank());
3218 
3219  for (size_t i = 0; i < _Src_shape->_Get_rank(); ++i) {
3220  _Src_extent[i] = _Src_shape->_Get_base_extent()[i];
3221  _Src_offset[i] = _Src_shape->_Get_view_offset()[i];
3222  _Dst_extent[i] = _Dst_shape->_Get_base_extent()[i];
3223  _Dst_offset[i] = _Dst_shape->_Get_view_offset()[i];
3224  _Copy_extent[i] = _Src_shape->_Get_view_extent()[i];
3225  }
3226 
3227  _Array_copy_desc _Desc(
3228  _Src_shape->_Get_rank(),
3229  _Src_shape->_Get_linear_offset(),
3230  _Src_extent.data(),
3231  _Src_offset.data(),
3232  _Dst_shape->_Get_linear_offset(),
3233  _Dst_extent.data(),
3234  _Dst_offset.data(),
3235  _Copy_extent.data());
3236 
3237  // Note: Capturing shape pointers would be incorrect, they are valid for setting up the call.
3238  // They might be deleted right after this call completes.
3239  HRESULT hr = _Recursive_array_copy(_Desc, 1, [_SrcIter, _DstIter](const _Array_copy_desc &_Reduced) -> HRESULT {
3240 
3241  auto _SrcFirst = _SrcIter;
3242  auto _DstFirst = _DstIter;
3243 
3244  std::advance(_DstFirst, _Reduced._Dst_linear_offset + _Reduced._Dst_copy_offset[0]);
3245  std::advance(_SrcFirst, _Reduced._Src_linear_offset + _Reduced._Src_copy_offset[0]);
3246  auto _SrcLast = _SrcFirst;
3247  std::advance(_SrcLast, _Reduced._Copy_extents[0]);
3248 
3249  std::copy(_SrcFirst, _SrcLast, _DstFirst);
3250 
3251  return S_OK;
3252  });
3253 
3254  if (FAILED(hr)) {
3255  throw Concurrency::runtime_exception("Failed to copy between buffers", E_FAIL);
3256  }
3257 
3258  return _Event();
3259  }
3260 
3261  // A ubiquitous buffer that provides access to the underlying data
3262  // on any accelerator_view
3264  {
3265  friend _Event _Get_access_async(const _View_key _Key, accelerator_view _Av, _Access_mode _Mode, _Buffer_ptr &_Buf_ptr);
3266  friend _AMPIMP accelerator_view __cdecl _Select_copy_src_accelerator_view(_In_ _View_key _Src_view_key, const accelerator_view &_Dest_accelerator_view);
3267  friend struct _DPC_call_handle;
3268 
3269  public:
3270 
3271  _AMPIMP static _Ret_ _Ubiquitous_buffer * __cdecl _Create_ubiquitous_buffer(size_t _Num_elems, size_t _Elem_size);
3272 
3273  _AMPIMP static _Ret_ _Ubiquitous_buffer * __cdecl _Create_ubiquitous_buffer(_Buffer_ptr _Master_buffer);
3274 
3275  // Register a new view on top of this _Ubiquitous_buffer
3276  _AMPIMP void _Register_view(_In_ _View_key _Key, accelerator_view _Cpu_av, _View_shape_ptr _Shape);
3277 
3278  // Register a copy of an existing view registered with this _Ubiquitous_buffer
3279  _AMPIMP void _Register_view_copy(_In_ _View_key _New_view_key, _In_ _View_key _Existing_view_key);
3280 
3281  // Unregister a view currently registered with this _Ubiquitous_buffer
3282  _AMPIMP void _Unregister_view(_In_ _View_key _Key);
3283 
3284  // Obtain a specified mode of access to the specified view on the specified target
3285  // accelerator_view. This method also serves the purpose of determining the
3286  // amount of data copy expected to happen as part of this _Get_access request
3287  // without actually performing the copies or state updates in the _Ubiquitous_buffer. This
3288  // is used for reporting the implicit data copies that happen when accessing array_views
3289  // in C++ AMP ETW events
3290  _AMPIMP _Event _Get_access_async(_In_ _View_key _Key, _Accelerator_view_impl_ptr _Av_view_impl_ptr,
3291  _Access_mode _Mode, _Buffer_ptr &_Buf_ptr,
3292  _Inout_opt_ ULONGLONG *_Sync_size = nullptr);
3293 
3294  // Discard the content underlying this view
3295  _AMPIMP void _Discard(_In_ _View_key _Key);
3296 
3297  // This method does not synchonize the copies. Should not be used for getting
3298  // data access but only to get the underlying buffer's properties
3299  _AMPIMP _Buffer_ptr _Get_master_buffer() const;
3300 
3301  _AMPIMP accelerator_view _Get_master_accelerator_view() const;
3302 
3303  _AMPIMP _View_shape_ptr _Get_view_shape(_In_ _View_key _Key);
3304 
3305  _Ret_ _Accelerator_view_impl* _Get_master_accelerator_view_impl() const
3306  {
3307  return _M_master_av;
3308  }
3309 
3311  {
3312  return _M_master_buffer_elem_size;
3313  }
3314 
3316  {
3317  return _M_master_buffer_num_elems;
3318  }
3319 
3320  bool _Has_data_source() const
3321  {
3322  return _M_has_data_source;
3323  }
3324 
3325  private:
3326 
3327  // The _Ubiquitous_buffer constructors are private to force construction through the static
3328  // _Create_ubiquitous_buffer method to ensure the object is allocated in the runtime
3329  _Ubiquitous_buffer(size_t _Num_elems, size_t _Elem_size);
3330  _Ubiquitous_buffer(_In_ _Buffer* _Master_buffer);
3331 
3332  // Private destructor to force deletion through _Release
3333  ~_Ubiquitous_buffer();
3334 
3335  // No default consturctor, copy constructor and assignment operator
3339 
3340  // Helper methods
3341 
3342  // Get access to a buffer on a specified accelerator for a specified pre-registered view.
3343  // If _Sync_size parameter is not null, then function calculates number of bytes that we
3344  // need to synchronize to get desired access.
3345  _AMPIMP _Event _Get_access_async(_In_ _View_key _Key, accelerator_view _Av, _Access_mode _Mode,
3346  _Buffer_ptr &_Buf_ptr, _Inout_opt_ ULONGLONG *_Sync_size = NULL);
3347 
3348  // Commit a view to the master buffer if needed. When the _Sync_size parameter is non-null
3349  // this method just returns the amount of data to be copied as part of the commit, without
3350  // actually performing the commit
3351  _Event _Commit_view_async(_In_ _View_info *_Info, _Inout_ ULONGLONG *_Sync_size = nullptr);
3352 
3353  // Get the _Buffer_ptr corresponding to a specified accelerator_view. When the
3354  // _Create parameter is true, it creates a new _Buffer if one does not already exist
3355  // for that accelerator_view
3356  _Ret_ _Buffer* _Get_buffer(_In_ _Accelerator_view_impl* _Av, bool _Create = true);
3357 
3358  // Sets a new access mode for the specified view
3359  void _Set_new_access_mode(_Inout_ _View_info *_Info, _Access_mode _New_mode);
3360 
3361  // Unsets the discard flag from the specified view and all other
3362  // overlapping views
3363  void _Unset_discard_flag(_Inout_ _View_info *_Info);
3364 
3365  // Determines whether the data underlying the specified view has been discarded
3366  // based on whether a subsuming view has the discard flag set.
3367  bool _Should_discard(const _View_info *_Info) const;
3368 
3369  // Does this view have exclusive data which is not discarded,
3370  // not on the master accelerator_view and also there is not other view
3371  // that subsumes this view and is marked dirty
3372  bool _Has_exclusive_data(const _View_info *_Info) const;
3373 
3374  // Based on the current state of overlapping views in the _Ubiquitous_buffer
3375  // does the specified view require a data update on the target accelerator_view
3376  // to fulfil an access request
3377  bool _Requires_update_on_target_accelerator_view(const _View_info *_Info,
3378  _Access_mode _Requested_mode,
3379  _In_ _Accelerator_view_impl* _Target_acclerator_view) const;
3380 
3381  // This method iterates over all views in the specified commit list
3382  // and flags them as "commit not needed" if that view is subsumed by another view present in the
3383  // commit list
3384  static void _Flag_redundant_commits(std::vector<std::pair<_View_info*, bool>> &_Commit_list);
3385 
3386  // This method returns the list of accelerator_views where the specified view already has
3387  // a valid cached copy of the data and getting read access would not incur any data movement.
3388  // The _Can_access_anywhere parameter is an output parameter used to indicate to the
3389  // caller that the specified view can be accessed on any accelerator_view without incurring
3390  // any data movement. This is true when there are no modified overlapping views that require
3391  // synchronization and the specified view has the discard_data flag set.
3392  // This method is used for determining the source accelerator_view for copy and p_f_e operations
3393  // involving array_views
3394  _Accelerator_view_unordered_set _Get_caching_info(_In_ _View_key _Key, _Out_opt_ bool *_Can_access_anywhere = NULL);
3395 
3396  _Accelerator_view_unordered_set _Get_caching_info_impl(_In_ _View_key _Key, _Out_opt_ bool *_Can_access_anywhere);
3397 
3398  _Ret_ _Accelerator_view_impl* _Determine_alternate_target_accelerator_view(_In_ _View_key _Key,
3399  _In_ _Accelerator_view_impl* _Original_av,
3400  _Access_mode _Mode);
3401 
3402  private:
3403 
3404  // Private data
3405 
3406  // The master accelerator_view for this _Ubiquitous_buffer
3407  // which is specified at construction time
3408  _Accelerator_view_impl_ptr _M_master_av;
3409 
3410  // The master _Buffer corresponding to this _Ubiquitous_buffer
3411  // which is specified at construction time
3413 
3414  // The size of each element of the master buffer
3416 
3417  // The number of elements in the master buffer
3419 
3420  // Indicates if this ubiquitous buffer has an underlying data source
3422 
3423  // A map of pre-created _Buffers corresponding to different
3424  // accelerator_views where the _Ubiquitous_buffer has already been
3425  // accessed
3426  std::map<_Accelerator_view_impl_ptr, _Buffer_ptr> _M_buffer_map;
3427 
3428  // A mapping between all registered view keys in this _Ubiquitous_buffer
3429  // to their corresponding _View_info
3430  std::unordered_map<_View_key, _View_info*> _M_view_map;
3431 
3432  // Set of distinct views of this buffer. As multiple copies of the same
3433  // view may have been registered for this _Ubiquitous_buffer, this set
3434  // maintains the set of distinct views which really matter for the
3435  // caching protocol. Also, note that some view_info may not have any live registered
3436  // and hence does not exist in the _M_view_map but may exist here since
3437  // it has uncomiitted data which needs to be considered as part of the cache
3438  // coherence protocol to prevent modifications underlying this view from being lost
3439  std::unordered_set<_View_info*> _M_view_info_set;
3440 
3441  // Critical section object to protect the cache directory
3443  };
3444 
3445  // Class defines functions for interoperability with D3D
3447  {
3448  public:
3449  _AMPIMP static _Ret_ IUnknown * __cdecl _Get_D3D_buffer(_In_ _Buffer *_Buffer_ptr);
3450  _AMPIMP static _Ret_ IUnknown * __cdecl _Get_D3D_texture(_In_ _Texture *_Texture_ptr);
3451  _AMPIMP static _Ret_ void * __cdecl _Get_D3D_sampler_data_ptr(_In_ IUnknown *_D3D_sampler);
3452  _AMPIMP static void __cdecl _Release_D3D_sampler_data_ptr(_In_ void *_Sampler_data_ptr);
3453  _AMPIMP static _Ret_ IUnknown * __cdecl _Get_D3D_sampler(const Concurrency::accelerator_view &_Av, _In_ _Sampler *_Sampler_ptr);
3454  };
3455 
3456  inline
3457  _Event _Get_access_async(const _View_key _Key, accelerator_view _Av, _Access_mode _Mode, _Buffer_ptr &_Buf_ptr)
3458  {
3459  return _Key->_Get_buffer_ptr()->_Get_access_async(_Key->_Get_view_key(), _Av, _Mode, _Buf_ptr);
3460  }
3461 
3462  inline
3464  {
3465  return _Descriptor._Get_buffer_ptr()->_Get_view_shape(_Descriptor._Get_view_key());
3466  }
3467 
3468  inline
3469  bool _Is_cpu_accelerator(const accelerator& _Accl)
3470  {
3471  return (_Accl.device_path == accelerator::cpu_accelerator);
3472  }
3473 
3474 } // namespace Concurrency::details
3475 
3481 {
3482 public:
3493  _AMPIMP explicit accelerator_view_removed(const char * _Message, HRESULT _View_removed_reason) throw();
3494 
3501  _AMPIMP explicit accelerator_view_removed(HRESULT _View_removed_reason) throw();
3502 
3509  _AMPIMP HRESULT get_view_removed_reason() const throw();
3510 
3511 private:
3512 
3514 }; // class accelerator_view_removed
3515 
3521 {
3522 public:
3529  _AMPIMP explicit invalid_compute_domain(const char * _Message) throw();
3530 
3534  _AMPIMP invalid_compute_domain() throw();
3535 }; // class invalid_compute_domain
3536 
3541 {
3542 public:
3549  _AMPIMP explicit unsupported_feature(const char * _Message) throw();
3550 
3554  _AMPIMP unsupported_feature() throw();
3555 }; // class unsupported_feature
3556 
3557 } // namespace Concurrency
3558 
3559 // =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
3560 //
3561 // Compiler/Runtime Interface
3562 //
3563 // =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
3564 
3565 #define HELPERAPI __cdecl
3566 
3567 using namespace Concurrency::details;
3568 
3569 extern "C" {
3570 
3571  // This structure is used for storing information about resources required by the kernel.
3573  {
3577  };
3578 
3580  {
3581  _Resource_kind _M_resource_kind; // buffer, texture, or sampler
3582 
3583  void * _M_desc; // Pointer to the _Buffer_descriptor/_Texture_descriptor/_Sampler_descriptor instance
3584  // which underlies all device resource
3585 
3586  _Access_mode _M_formal_access_mode; // scalar: read-only
3587  // const scalar ref: read-only
3588  // scalar ref: ReadWrite
3589  // array: ReadWrite
3590  // const array: ReadOnly
3592 
3593  BOOL _Is_buffer() const
3594  {
3595  return (_M_resource_kind == RESOURCE_BUFFER);
3596  }
3597 
3598  BOOL _Is_texture() const
3599  {
3600  return (_M_resource_kind == RESOURCE_TEXTURE);
3601  }
3602 
3603  BOOL _Is_sampler() const
3604  {
3605  return (_M_resource_kind == RESOURCE_SAMPLER);
3606  }
3607 
3609  {
3610  _ASSERTE(_Is_buffer());
3611  return reinterpret_cast<_Buffer_descriptor *>(_M_desc);
3612  }
3613 
3615  {
3616  _ASSERTE(_Is_texture());
3617  return reinterpret_cast<_Texture_descriptor *>(_M_desc);
3618  }
3619 
3621  {
3622  _ASSERTE(_Is_sampler());
3623  return reinterpret_cast<_Sampler_descriptor *>(_M_desc);
3624  }
3625 
3626  _Ret_ void * _Get_resource_ptr() const
3627  {
3628  if (_Is_buffer())
3629  {
3630  _Ubiquitous_buffer * _Tmp = _Get_buffer_desc()->_Get_buffer_ptr();
3631  return reinterpret_cast<void *>(_Tmp);
3632  }
3633  else if (_Is_texture())
3634  {
3635  _Texture * _Tmp = _Get_texture_desc()->_Get_texture_ptr();
3636  return reinterpret_cast<void *>(_Tmp);
3637  }
3638  else
3639  {
3640  _ASSERTE(_Is_sampler());
3641  _Sampler * _Tmp = _Get_sampler_desc()->_Get_sampler_ptr();
3642  return reinterpret_cast<void *>(_Tmp);
3643  }
3644  }
3645  };
3646 
3647  // This structure is used for storing information about the const buffers
3649  {
3650  void * _M_data; // Pointer to the host data to intialize the
3651  // constant buffer with
3652 
3653  size_t _M_const_buf_size; // Size of the const buffer in bytes
3654 
3655  unsigned int _M_is_debug_data; // Is this debug data which will be
3656  // intialized by the runtime. 0 (false), 1 (true)
3657  };
3658 }
3659 
3660 namespace Concurrency
3661 {
3662 namespace details
3663 {
3665  {
3666  NON_ALIASED_SHADER = 0, // slot 0
3667  ALIASED_SHADER = 1, // slot 1
3669  };
3670 
3672  {
3673  _Accelerator_view_impl *_M_rv;
3675 
3676  // Info about the kernel function arguments
3681 
3682  // Info about the host buffer created corresponding to the const buffer
3685 
3687 
3688  // Kernel funcs
3689  _DPC_shader_blob * _M_shader_blobs[NUM_SHADER_VERSIONS];
3690 
3691  // Compute domain info
3693  unsigned int _M_compute_rank;
3694  unsigned int * _M_grid_extents;
3695 
3696  // Kernel dispatch info
3697  unsigned int _M_groupCountX;
3698  unsigned int _M_groupCountY;
3699  unsigned int _M_groupCountZ;
3700 
3701  // The shape of the group
3702  unsigned int _M_groupExtentX;
3703  unsigned int _M_groupExtentY;
3704  unsigned int _M_groupExtentZ;
3705 
3706  _DPC_call_handle(const accelerator_view &_Accelerator_view)
3707  {
3708  if (!_Accelerator_view.is_auto_selection) {
3709  _M_rv = _Get_accelerator_view_impl_ptr(_Accelerator_view);
3710  }
3711  else {
3712  _M_rv = NULL;
3713  }
3714 
3715  _M_is_explicit_target_acclview = false;
3716  if (_M_rv != NULL) {
3717  _M_is_explicit_target_acclview = true;
3718  }
3719 
3720  _M_device_resource_info = NULL;
3721  _M_num_resources = 0;
3722  _M_num_writable_buffers = 0;
3723  _M_num_samplers = 0;
3724 
3725  _M_const_buffer_info = NULL;
3726  _M_num_const_buffers = 0;
3727 
3728  _M_RW_aliasing = false;
3729 
3730  for (size_t _I = 0; _I < NUM_SHADER_VERSIONS; _I++)
3731  {
3732  _M_shader_blobs[_I] = NULL;
3733  }
3734 
3735  _M_is_flat_model = 0;
3736  _M_compute_rank = 0;
3737  _M_grid_extents = NULL;
3738 
3739  _M_groupCountX = 0;
3740  _M_groupCountY = 0;
3741  _M_groupCountZ = 0;
3742 
3743  _M_groupExtentX = 0;
3744  _M_groupExtentY = 0;
3745  _M_groupExtentZ = 0;
3746  }
3747 
3749  {
3750  if (_M_grid_extents) {
3751  delete [] _M_grid_extents;
3752  }
3753  }
3754 
3755  bool _Is_buffer_aliased(_In_ void *_Buffer_ptr)
3756  {
3757  return ((_M_aliased_buffer_set != nullptr) && (_M_aliased_buffer_set->find(_Buffer_ptr) != _M_aliased_buffer_set->end()));
3758  }
3759 
3760  bool _Is_buffer_unaccessed(size_t _Buffer_idx)
3761  {
3762  return ((_M_is_device_buffer_unaccessed != nullptr) && _M_is_device_buffer_unaccessed->operator[](_Buffer_idx));
3763  }
3764 
3765  void _Set_buffer_unaccessed(size_t _Buffer_idx)
3766  {
3767  if (_M_is_device_buffer_unaccessed == nullptr) {
3768  _M_is_device_buffer_unaccessed = std::unique_ptr<std::vector<bool>>(new std::vector<bool>(_M_num_resources, false));
3769  }
3770 
3771  _M_is_device_buffer_unaccessed->operator[](_Buffer_idx) = true;
3772  }
3773 
3774  const int* _Get_redirect_indices() const
3775  {
3776  if (!_M_RW_aliasing) {
3777  return nullptr;
3778  }
3779 
3780  _ASSERTE(_M_Redirect_indices != nullptr);
3781 
3782  return _M_Redirect_indices->data();
3783  }
3784 
3785  void _Check_buffer_aliasing();
3786  void _Update_buffer_rw_property();
3787  void _Setup_aliasing_redirection_indices();
3788  void _Select_accelerator_view();
3789  void _Verify_buffers_against_accelerator_view();
3790 
3791  private:
3792  std::unique_ptr<std::unordered_set<void*>> _M_aliased_buffer_set;
3793  std::unique_ptr<std::vector<bool>> _M_is_device_buffer_unaccessed;
3794  // Info about read-write aliasing
3795  std::unique_ptr<std::vector<int>> _M_Redirect_indices;
3796  };
3797 
3798  // This structure is used for passing the scheduling
3799  // info to the parallel_for_each which is handed back
3800  // to the compiler-runtime interface methods by the front-end
3802  {
3803  // The accelerator view to invoke a parallel_for_each on
3804  accelerator_view _M_accelerator_view;
3805  };
3806 
3807 } // namespace Concurrency::details
3808 
3809 
3820 _AMPIMP void __cdecl amp_uninitialize();
3821 
3822 } // namespace Concurrency
3823 
3824 extern "C" {
3825 
3826  // Return a compiler helper handle.
3828 
3829  // Destroy the call handle
3831 
3832  _AMPIMP void HELPERAPI __dpc_set_device_resource_info(_In_ _DPC_call_handle * _Handle, _In_ _Device_resource_info * _DeviceResourceInfo, size_t _NumResources) throw(...);
3833 
3834  // Set const buffer info.
3835  _AMPIMP void HELPERAPI __dpc_set_const_buffer_info(_In_ _DPC_call_handle * _Handle, _In_ _Device_const_buffer_info * _DeviceConstBufferInfo, size_t _NumConstBuffers) throw(...);
3836 
3837  // Set the kernel shader info
3839  _Inout_ void ** _ShaderBlobs) throw(...);
3840  // Set kernel dispatch info
3842  unsigned int _ComputeRank,
3843  _In_ int * _Extents,
3844  unsigned int _GroupRank,
3845  const unsigned int * _GroupExtents,
3846  unsigned int & _GroupCountX,
3847  unsigned int & _GroupCountY,
3848  unsigned int & _GroupCountZ) throw(...);
3849 
3850  // Dispatch the kernel
3851  _AMPIMP void HELPERAPI __dpc_dispatch_kernel(_In_ _DPC_call_handle * _Handle) throw(...);
3852 
3853 #ifdef _DEBUG
3854  // Dispatch the kernel passed as a HLSL source level shader
3855  // This function is to be used only for testing and debugging purposes
3856  _AMPIMP void HELPERAPI __dpc_dispatch_kernel_test(_In_ _DPC_call_handle * _Handle, _In_ WCHAR* szFileName, LPCSTR szEntryPoint) throw(...);
3857 #endif
3858 }
3859 
3860 // =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
3861 //
3862 // C++ AMP ETW Provider
3863 //
3864 // =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
3865 
3866 namespace Concurrency
3867 {
3868 namespace details
3869 {
3870 
3871 // Thread-safe factory method for _Amp_runtime_trace object
3873 
3874 // Class that gathers C++ AMP diagnostic information and triggers events
3876 {
3877 
3878 // Called by factory to create single instance of _Amp_runtime_trace type
3879 friend BOOL CALLBACK _Init_amp_runtime_trace(PINIT_ONCE _Init_once, PVOID _Param, _Inout_ PVOID *_Context);
3880 
3881 public:
3882  // Destructor for _Amp_runtime_trace, called at program termination
3884 
3885  // End event is triggered by multiple other events such us StartComputeEvent to show exactly when given activity completed
3886  _AMPIMP void _Write_end_event(ULONG _Span_id);
3887 
3888  // Add accelerator configuration information
3889  // Note: This member function does not have to be exported, it is used by C++ AMP runtime factory
3890  void _Add_accelerator_config_event(PVOID _Accelerator_id, LPCWSTR _Device_path, LPCWSTR _Device_description);
3891 
3892  // Used by callback function, to write all configuration data when new session is detected
3893  // Note: This member function does not have to be exported, it is used by C++ AMP runtime factory
3894  void _Write_all_accelerator_config_events();
3895 
3896  // Started accelerator_view::wait operation
3897  // Note: This member function does not have to be exported, it is used by C++ AMP runtime factory
3898  ULONG _Start_accelerator_view_wait_event(PVOID _Accelerator_id, PVOID _Accelerator_view_id);
3899 
3900  // Launched accelerator_view::flush operation
3901  // Note: This member function does not have to be exported, it is used by C++ AMP runtime factory
3902  void _Launch_flush_event(PVOID _Accelerator_id, PVOID _Accelerator_view_id);
3903 
3904  // Launched accelerator_view::create_marker operation
3905  // Note: This member function does not have to be exported, it is used by C++ AMP runtime factory
3906  ULONG _Launch_marker(PVOID _Accelerator_id, PVOID _Accelerator_view_id);
3907 
3908  // Below are set of helpers that take various types that were available at event injection point and extract all necessary data
3909  _AMPIMP ULONG _Start_parallel_for_each_event_helper(_In_ _DPC_call_handle *_Handle);
3910 
3911  // This helper wraps functor with wait start and wait end events
3913  {
3914  std::shared_future<void> retFuture;
3915  concurrency::task_completion_event<void> retTaskCompletionEvent;
3916 
3917  // Create a std::shared_future by creating a deferred task through std::async that waits for the
3918  // event _Ev to finish. Wrap functor with start and end events
3919  retFuture = std::async(std::launch::sync, [=]() mutable {
3920  try
3921  {
3922  if (_Async_op_id == _Amp_runtime_trace::_M_event_disabled)
3923  {
3924  _Ev._Get();
3925  }
3926  else
3927  {
3928  auto _Span_id = details::_Get_amp_trace()->_Start_async_op_wait_event(_Async_op_id);
3929  _Ev._Get();
3931  }
3932  }
3933  catch(...)
3934  {
3935  // If an exception is encountered when executing the asynchronous operation
3936  // we should set the exception on the retTaskCompletionEvent so that it is
3937  // appropriately cancelled and the exception is propagated to continuations
3938  retTaskCompletionEvent.set_exception(std::current_exception());
3939  throw;
3940  }
3941 
3942  retTaskCompletionEvent.set();
3943  });
3944 
3945  // Register the async event with the runtime asynchronous events manager
3946  _Register_async_event(_Ev, retFuture);
3947 
3948  // Lets issue a continuation just to swallow any exceptions that are encountered during the
3949  // async operation and are never observed by the user or are just observed through the
3950  // shared_future and not through the task
3951  concurrency::task<void> retTask(retTaskCompletionEvent);
3952  retTask.then([](concurrency::task<void> _Task) {
3953  try {
3954  _Task.get();
3955  }
3956  catch(...) {
3957  }
3958  });
3959 
3960  return Concurrency::completion_future(retFuture, retTask);
3961  }
3962 
3963  _AMPIMP ULONG _Start_array_view_synchronize_event_helper(const _Buffer_descriptor &_Buff_desc);
3964  _AMPIMP ULONG _Launch_array_view_synchronize_event_helper(const _Buffer_descriptor &_Buff_desc);
3965 
3966  // Helpers for buffers (array, array_view)
3967  _AMPIMP ULONG _Start_copy_event_helper(const _Buffer_descriptor &_Src, const _Buffer_descriptor &_Dest, ULONGLONG _Num_bytes_for_copy);
3968  _AMPIMP ULONG _Start_copy_event_helper(nullptr_t, const _Buffer_descriptor &_Dest, ULONGLONG _Num_bytes_for_copy);
3969  _AMPIMP ULONG _Start_copy_event_helper(const _Buffer_descriptor &_Src, nullptr_t, ULONGLONG _Num_bytes_for_copy);
3970  _AMPIMP ULONG _Launch_async_copy_event_helper(const _Buffer_descriptor &_Src, const _Buffer_descriptor &_Dest, ULONGLONG _Num_bytes_for_copy);
3971  _AMPIMP ULONG _Launch_async_copy_event_helper(nullptr_t, const _Buffer_descriptor &_Dest, ULONGLONG _Num_bytes_for_copy);
3972  _AMPIMP ULONG _Launch_async_copy_event_helper(const _Buffer_descriptor &_Src, nullptr_t, ULONGLONG _Num_bytes_for_copy);
3973 
3974  // Helper for textures
3975  _AMPIMP ULONG _Start_copy_event_helper(const _Texture_descriptor &_Src, nullptr_t, ULONGLONG _Num_bytes_for_copy);
3976  _AMPIMP ULONG _Start_copy_event_helper(nullptr_t, const _Texture_descriptor &_Dest, ULONGLONG _Num_bytes_for_copy);
3977  _AMPIMP ULONG _Start_copy_event_helper(const _Texture_descriptor &_Src, const _Texture_descriptor &_Dest, ULONGLONG _Num_bytes_for_copy);
3978  _AMPIMP ULONG _Launch_async_copy_event_helper(const _Texture_descriptor &_Src, nullptr_t, ULONGLONG _Num_bytes_for_copy);
3979  _AMPIMP ULONG _Launch_async_copy_event_helper(nullptr_t, const _Texture_descriptor &_Dest, ULONGLONG _Num_bytes_for_copy);
3980  _AMPIMP ULONG _Launch_async_copy_event_helper(const _Texture_descriptor &_Src, const _Texture_descriptor &_Dest, ULONGLONG _Num_bytes_for_copy);
3981 
3982  void _Enable_provider(bool _Enable = true);
3983 
3984 private:
3985  // Private constructor. This type is created by factory method
3986  _Amp_runtime_trace(PVOID _Callback_function, _In_ _Trace *_Trace);
3987 
3988  // Disallow copy construction
3990 
3991  // Disallow assignment operator
3993 
3994  // Used internally to write configuation events
3995  void _Write_accelerator_config_event(const std::tuple<PVOID, LPCWSTR, LPCWSTR> &_ConfigTuple);
3996 
3997  // Event triggered when computation is scheduled
3998  ULONG _Start_parallel_for_each_event(
3999  PVOID _Accelerator_id,
4000  PVOID _Accelerator_view_id,
4001  BOOL _Is_tiled_explicitly,
4002  ULONGLONG _Num_of_tiles,
4003  ULONG _Num_of_threads_per_tile,
4004  BOOL _Is_aliased,
4005  ULONG _Num_read_only_resources,
4006  ULONG _Num_read_write_resources,
4007  ULONGLONG _Size_of_all_resouces,
4008  ULONG _Size_of_const_data,
4009  ULONGLONG _Size_of_data_for_copy);
4010 
4011  // Synchronous copy operation has started
4012  ULONG _Start_copy_event(
4013  PVOID _Src_accelerator_id,
4014  PVOID _Src_accelerator_view_id,
4015  PVOID _Dst_accelerator_id,
4016  PVOID _Dst_accelerator_view_id,
4017  ULONGLONG _Num_bytes_for_copy,
4018  BOOL _Is_src_staging,
4019  BOOL _Is_dst_staging);
4020 
4021  // Asynchronous copy operation has been launched
4022  ULONG _Launch_async_copy_event(
4023  PVOID _Src_accelerator_id,
4024  PVOID _Src_accelerator_view_id,
4025  PVOID _Dst_accelerator_id,
4026  PVOID _Dst_accelerator_view_id,
4027  ULONGLONG _Num_bytes_for_copy,
4028  BOOL _Is_src_staging,
4029  BOOL _Is_dst_staging);
4030 
4031  // Started waiting for asynchronous operation to complete
4032  _AMPIMP ULONG _Start_async_op_wait_event(ULONG _Async_op_id);
4033 
4034  // Started array_view::synchronize operation
4035  ULONG _Start_array_view_synchronize_event(ULONGLONG _Num_bytes_to_synchronize);
4036 
4037  // Async array_view::synchronize operation has been launched
4038  ULONG _Launch_array_view_synchronize_event(ULONGLONG _Num_bytes_to_synchronize);
4039 
4040  // Helper function that extracts information from buffer descriptor
4041  std::tuple<PVOID, PVOID, BOOL> _Get_resource_diagnostic_info(const _Buffer_descriptor &_Buff_desc, accelerator_view _Accl_view) const;
4042 
4043  // Helper function that extracts information from texture descriptor
4044  std::tuple<PVOID, PVOID, BOOL> _Get_resource_diagnostic_info(const _Texture_descriptor &_Tex_desc) const;
4045 
4046  // Generates unique identifiers for span_id and async_op_id
4047  ULONG _Get_unique_identifier();
4048 
4049  // Critical section object used by callback function to synchronize following situations:
4050  // a) multiple sessions have started at the same time
4051  // b) C++ AMP Runtime factory adds new accelerator config event to the collection
4053 
4054  // Collection of all configuration events at the time of C++ AMP Runtime initialization
4055  std::vector<std::tuple<PVOID, LPCWSTR, LPCWSTR>> _M_accelerator_configs;
4056 
4057  // Unique counter for span id and async operation id
4058  volatile ULONG _M_counter;
4059 
4060  // Type that implements ITrace interface and writes events e.g. ETW events
4061  _Trace* _M_trace_ptr;
4062 
4063  // Special value that we return to chain events if provider is disabled
4064  static const ULONG _M_event_disabled = 0;
4065 };
4066 
4067 // Helper function to query the number of mipmap levels from texture object
4068 inline unsigned int _Get_mipmap_levels(const _Texture *_Tex)
4069 {
4070  _ASSERTE(_Tex);
4071  return _Tex->_Get_mip_levels();
4072 }
4073 
4074 } // namespace Concurrency::details
4075 } // namespace Concurrency
4076 
4077 namespace concurrency = Concurrency;
4078 
4079 #pragma pack(pop)
unsigned int * _M_view_extent
Definition: amprt.h:1954
#define _Out_
Definition: sal.h:351
unsigned int _M_linear_offset
Definition: amprt.h:1951
_AMPIMP bool __cdecl is_timeout_disabled(const accelerator_view &_Accelerator_view)
Returns a boolean flag indicating if timeout is disabled for the specified accelerator_view. This corresponds to the D3D11_CREATE_DEVICE_DISABLE_GPU_TIMEOUT flag for Direct3D device creation.
_Event _Copy_impl(_In_ _Buffer *_Src, _View_shape_ptr _Src_shape, OutputIterator _DestIter, _View_shape_ptr _Dst_shape)
Definition: amprt.h:3134
unsigned int _M_view_mipmap_levels
Definition: amprt.h:570
static bool _Compare_extent(unsigned int _Rank, const unsigned int *_Extent1, const unsigned int *_Extent2)
Definition: amprt.h:1744
_AMPIMP scoped_d3d_access_lock & operator=(scoped_d3d_access_lock &&_Other)
Move assignment operator for scoped_d3d_access_lock: Take ownership of a lock from another scoped_d3d...
completion_future & operator=(const completion_future &_Other)
Copy assignment operator
Definition: amprt.h:1382
HRESULT _M_error_code
Definition: amprt.h:887
_Sampler_descriptor & operator=(const _Sampler_descriptor &_Other) __GPU
Definition: amprt.h:753
size_t _Get_elem_size() const
Definition: amprt.h:2100
size_t _Get_master_buffer_elem_size() const
Definition: amprt.h:3310
size_t _M_num_writable_buffers
Definition: amprt.h:3679
Definition: concrt.h:378
unsigned int _Get_bits_per_element() const
Definition: amprt.h:2327
details::_Reference_counted_obj_ptr< details::_Buffer > _Buffer_ptr
Definition: amprt.h:302
Definition: amprt.h:3648
std::vector< unsigned int > _Copy_extents
Definition: amprt.h:3064
_Ty & get(array< _Ty, _Size > &_Arr) _NOEXCEPT
Definition: array:506
size_t _M_rc
Definition: amprt.h:156
_OutIt copy(_InIt _First, _InIt _Last, _OutIt _Dest)
Definition: xutility:2072
size_t _M_depth
Definition: amprt.h:2434
_Reference_counted_obj_ptr(const _Reference_counted_obj_ptr &_Other)
Definition: amprt.h:173
virtual _AMPIMP ~runtime_exception()
Destruct a runtime_exception exception object instance
friend class _D3D_accelerator_view_impl
Definition: amprt.h:317
_Ret_ void * _Get_data_ptr() const
Definition: amprt.h:2088
void get() const
Returns the result this task produced. If the task is not in a terminal state, a call to get will wai...
Definition: ppltasks.h:4387
_Accelerator_view_impl_ptr _M_master_av
Definition: amprt.h:3408
size_t operator()(const accelerator_view &_Accl_view) const
Definition: amprt.h:1647
_Short_vector_base_type_id
Definition: amprt.h:285
static _AMPIMP const wchar_t direct3d_ref[]
String constant for direct3d reference accelerator
Definition: amprt.h:1121
_AMPIMP ~_Event()
Destructor of the _Event.
completion_future()
Default constructor
Definition: amprt.h:1350
exception_ptr current_exception()
Definition: exception:527
const unsigned int * _Get_base_extent() const
Definition: amprt.h:1677
std::unique_ptr< std::vector< bool > > _M_is_device_buffer_unaccessed
Definition: amprt.h:3793
#define _Out_opt_
Definition: sal.h:352
void _Get_preferred_copy_chunk_extent(unsigned int _Rank, size_t _Width, size_t _Height, size_t _Depth, size_t _Bits_per_element, _Out_writes_(3) size_t *_Preferred_copy_chunk_extent)
Definition: amprt.h:2693
#define S_OK
Definition: comutil.h:62
_AMPIMP runtime_exception(const char *_Message, HRESULT _Hresult)
Construct a runtime_exception exception with a message and an error code
unchecked_array_iterator< _Iterator > make_unchecked_array_iterator(_Iterator _Ptr)
Definition: iterator:729
std::future_status wait_for(const std::chrono::duration< _Rep, _Period > &_Rel_time) const
Blocks until the associated asynchronous operation completes or _Rel_time has elapsed ...
Definition: amprt.h:1446
Definition: amprt.h:3575
Definition: amprt.h:2445
~_DPC_call_handle()
Definition: amprt.h:3748
void * _M_data
Definition: amprt.h:3650
unsigned int _Get_texture_format() const
Definition: amprt.h:2306
_AMPIMP _Event _Add_continuation(const std::function< _Event __cdecl()> &_Continuation_task)
Creates an event which is an ordered collection of this and a continuation task
_AMPIMP _Access_mode __cdecl _Get_recommended_buffer_host_access_mode(const accelerator_view &_Av)
Definition: amprt.h:3579
const size_t ERROR_MSG_BUFFER_SIZE
Definition: amprt.h:112
std::shared_future< void > _M_shared_future
Definition: amprt.h:1511
__declspec(property(get=get_version)) unsigned int version
_Check_return_ _In_ int _Mode
Definition: io.h:338
unsigned int _Get_mip_levels() const
Definition: amprt.h:2382
size_t _M_master_buffer_num_elems
Definition: amprt.h:3418
const unsigned int * _Get_view_offset() const
Definition: amprt.h:1682
_Event _Get_access_async(const _View_key _Key, accelerator_view _Av, _Access_mode _Mode, _Buffer_ptr &_Buf_ptr)
Definition: amprt.h:3457
unsigned int _Rank
Definition: amprt.h:3051
unsigned int _M_groupExtentY
Definition: amprt.h:3703
accelerator_view _M_accelerator_view
Definition: amprt.h:3804
Definition: amprt.h:289
bool _M_RW_aliasing
Definition: amprt.h:3686
unsigned int _Get_view_format() const
Definition: amprt.h:2311
bool _Subsumes(const _View_shape *_Other) const
Definition: amprt.h:1811
_AMPIMP accelerator()
Construct a accelerator representing the default accelerator
Definition: amprt.h:85
size_t _M_row_pitch
Definition: amprt.h:2441
void _Distance(_InIt _First, _InIt _Last, _Diff &_Off)
Definition: xutility:764
details::_Reference_counted_obj_ptr< details::_Texture > _Texture_ptr
Definition: amprt.h:303
_AMPIMP const wchar_t * _Get_description() const
bool _Is_buffer_aliased(_In_ void *_Buffer_ptr)
Definition: amprt.h:3755
unsigned int * _M_base_extent
Definition: amprt.h:1952
bool _Is_buffer_unaccessed(size_t _Buffer_idx)
Definition: amprt.h:3760
bool _Get_chunked_staging_texture(_In_ _Texture *_Tex, const size_t *_Copy_chunk_extent, _Inout_ size_t *_Remaining_copy_extent, _Out_ size_t *_Curr_copy_extent, _Out_ _Texture_ptr *_Staging_texture)
Definition: amprt.h:2519
std::wstring get_description() const
Returns the device description as a std::wstring
Definition: amprt.h:1216
_AMPIMP out_of_memory()
Construct an out_of_memory exception
_Ret_ _Accelerator_view_impl * _Get_accelerator_view_impl() const
Definition: amprt.h:2110
_AMPIMP _Ret_ IUnknown *__cdecl get_device(const accelerator_view &_Av)
Get the D3D device interface underlying a accelerator_view.
unsigned int _Get_address_mode() const
Definition: amprt.h:2478
_Buffer_descriptor & operator=(const _Buffer_descriptor &_Other) __GPU
Definition: amprt.h:484
std::vector< unsigned int > _Dst_copy_offset
Definition: amprt.h:3061
unsigned int _M_groupExtentZ
Definition: amprt.h:3704
static _AMPIMP const wchar_t direct3d_warp[]
String constant for direct3d WARP accelerator
Definition: amprt.h:1116
Exception thrown when an underlying OS/DirectX call fails due to lack of system or device memory ...
Definition: amprt.h:894
#define __GPU
Definition: amprt.h:41
_AMPIMP accelerator_view __cdecl create_accelerator_view(_In_ IUnknown *_D3D_device, queuing_mode _Qmode=queuing_mode_automatic)
Create a accelerator_view from a D3D device interface pointer.
_N
Definition: wchar.h:1269
size_t _Get_row_pitch() const
Definition: amprt.h:2387
_Reference_counted_obj_ptr & operator=(const _Reference_counted_obj_ptr &_Other)
Definition: amprt.h:194
Definition: amprt.h:101
_Buffer_descriptor(_In_ void *_Data_ptr, _In_ _Ubiquitous_buffer *_Buffer_ptr, _Access_mode _Curr_cpu_access_mode, _Access_mode _Type_mode) __GPU
Definition: amprt.h:461
void * _M_data_ptr
Definition: amprt.h:2503
std::future_status wait_until(const std::chrono::time_point< _Clock, _Duration > &_Abs_time) const
Blocks until the associated asynchronous operation completes or until the current time exceeds _Abs_t...
Definition: amprt.h:1461
_Ret_ void * _Get_resource_ptr() const
Definition: amprt.h:3626
_OutIt move(_InIt _First, _InIt _Last, _OutIt _Dest)
Definition: xutility:2447
Class represents a virtual device abstraction on a C++ AMP data-parallel accelerator ...
Definition: amprt.h:1518
_AMPIMP bool _Is_finished()
Poll whether the _Event has completed or not and throws any exceptions that occur ...
RAII wrapper for a D3D access lock on an accelerator_view.
Definition: amprt.h:1021
_AMPIMP std::pair< accelerator_view, accelerator_view > __cdecl _Get_src_dest_accelerator_view(_In_opt_ const _Buffer_descriptor *_SrcBuffDescPtr, _In_opt_ const _Buffer_descriptor *_DestBuffDescPtr)
_AMPIMP bool __cdecl d3d_access_try_lock(accelerator_view &_Av)
Attempt to acquire the D3D access lock on an accelerator_view without blocking.
STL namespace.
_Ret_ _Accelerator_view_impl * _Get_accelerator_view_impl_ptr(const accelerator_view &_Accl_view)
Definition: amprt.h:1632
std::vector< unsigned int > _Src_extents
Definition: amprt.h:3055
_Ubiquitous_buffer * _M_buffer_ptr
Definition: amprt.h:439
bool _M_owns_data
Definition: amprt.h:2213
details::_Reference_counted_obj_ptr< details::_View_shape > _View_shape_ptr
Definition: amprt.h:307
The Concurrency namespace provides classes and functions that provide access to the Concurrency Runti...
Definition: agents.h:42
Class represents a future corresponding to a C++ AMP asynchronous operation
Definition: amprt.h:1342
_Ret_ _Texture_descriptor * _Get_texture_desc() const
Definition: amprt.h:3614
Definition: amprt.h:291
std::wstring get_device_path() const
Returns the system-wide unique device instance path as a std::wstring
Definition: amprt.h:1200
bool _Is_valid_access_mode(_Access_mode _Mode)
Definition: amprt.h:411
size_t _Get_depth(unsigned int _Mip_offset=0) const
Definition: amprt.h:2296
void _Set_texture_ptr(_In_opt_ _Texture *_Texture_ptr) __GPU_ONLY
Definition: amprt.h:685
std::unordered_set< _View_info * > _M_view_info_set
Definition: amprt.h:3439
_Texture_descriptor(_Texture_descriptor &&_Other) __CPU_ONLY
Definition: amprt.h:634
_AMPIMP _Event __cdecl _Copy_async_impl(_In_ _Texture *_Src_tex, const size_t *_Src_offset, unsigned int _Src_mipmap_level, _Out_ _Texture *_Dst_tex, const size_t *_Dst_offset, unsigned int _Dst_mipmap_level, const size_t *_Copy_extent, const size_t *_Preferred_copy_chunk_extent=NULL)
_AMPIMP bool operator==(const _Event &_Other) const
Return true if the other _Event is same as this _Event; false otherwise
accelerator(const std::wstring &_Device_path)
Construct a accelerator representing the accelerator with the specified device instance path ...
Definition: amprt.h:1132
Definition: agents.h:74
Tag type to indicate the D3D access lock should be adopted rather than acquired.
Definition: amprt.h:1016
const bool * _Get_projection_info() const
Definition: amprt.h:1691
void _Set_depth_pitch(size_t _Val)
Definition: amprt.h:2402
unsigned int _M_is_debug_data
Definition: amprt.h:3655
_AMPIMP scoped_d3d_access_lock(accelerator_view &_Av)
Acquire a D3D access lock on the given accelerator_view. The lock is released when this object goes o...
details::_Reference_counted_obj_ptr< details::_Accelerator_impl > _Accelerator_impl_ptr
Definition: amprt.h:301
_AMPIMP bool get_supports_limited_double_precision() const
Returns a boolean value indicating whether the accelerator has limited double precision support (excl...
Definition: amprt.h:2228
_AMPIMP bool _Is_empty() const
Tells if this is an empty event
#define NULL
Definition: crtdbg.h:30
Definition: chrono:305
_Ret_ T * operator->() const
Definition: amprt.h:227
bool _M_has_data_source
Definition: amprt.h:3421
Definition: amprt.h:288
unsigned int _Get_num_channels() const
Definition: amprt.h:2316
bool _Owns_data() const
Definition: amprt.h:2120
_AMPIMP bool __cdecl _Set_default_accelerator(_Accelerator_impl_ptr _Accl)
_AMPIMP _Ret_ _DPC_call_handle *HELPERAPI __dpc_create_call_handle(_In_ _Host_Scheduling_info *_Sch_info)
_AMPIMP HRESULT get_error_code() const
Get the error code that caused this exception
access_type _Get_cpu_access_type(_Access_mode _Cpu_access_mode)
Definition: amprt.h:2021
iterator_traits< _InIt >::difference_type distance(_InIt _First, _InIt _Last)
Definition: xutility:755
bool operator==(const _Sampler_descriptor &_Other) const __GPU
Definition: amprt.h:770
void _Set_sampler_ptr(_In_opt_ _Sampler *_Sampler_ptr) __GPU_ONLY
Definition: amprt.h:797
_AMPIMP bool __cdecl _Is_D3D_accelerator_view(const accelerator_view &_Av)
_AMPIMP bool get_supports_cpu_shared_memory() const
Returns a boolean value indicating whether the accelerator supports memory accessible both by the acc...
_AMPIMP void _Get()
Wait until the _Event completes and throw any exceptions that occur.
details::_Reference_counted_obj_ptr< details::_Accelerator_view_impl > _Accelerator_view_impl_ptr
Definition: amprt.h:300
unsigned int _M_rank
Definition: amprt.h:2431
_AMPIMP _Ret_ _Amp_runtime_trace *__cdecl _Get_amp_trace()
BOOL _Is_sampler() const
Definition: amprt.h:3603
bool _Is_adopted() const
Definition: amprt.h:2157
enum _Short_vector_base_type_id _Texture_base_type_id
Definition: amprt.h:296
basic_ostream< _Elem, _Traits > &__CLRCALL_OR_CDECL flush(basic_ostream< _Elem, _Traits > &_Ostr)
Definition: ostream:1016
The task_completion_event class allows you to delay the execution of a task until a condition is sati...
Definition: ppltasks.h:2883
_AMPIMP bool get_has_display() const
Returns a boolean value indicating whether the accelerator is attached to a display ...
_Sampler_descriptor(const _Sampler_descriptor &_Other) __GPU
Definition: amprt.h:746
size_t _Get_reference_count()
Definition: amprt.h:150
void _Set_buffer_ptr(_In_opt_ _Ubiquitous_buffer *_Buffer_ptr) __CPU_ONLY
Definition: amprt.h:502
Definition: amprt.h:87
size_t _M_num_resources
Definition: amprt.h:3678
_AMPIMP accelerator & operator=(const accelerator &_Other)
Assignment operator
bool _Compare_base_shape(const _View_shape *_Other) const
Definition: amprt.h:1910
bool _Is_host_accessible(_Access_mode _Requested_access_mode) const
Definition: amprt.h:2142
unsigned int _Get_most_detailed_mipmap_level() const __GPU
Definition: amprt.h:653
#define UINT_MAX
Definition: limits.h:41
A non-reentrant mutex which is explicitly aware of the Concurrency Runtime.
Definition: concrt.h:3548
unsigned int _M_groupCountZ
Definition: amprt.h:3699
const unsigned int * _Get_view_extent() const
Definition: amprt.h:1686
_T _Greatest_common_divisor(_T _M, _T _N)
Definition: amprt.h:2732
_AMPIMP bool get_is_emulated() const
Returns a boolean value indicating whether the accelerator is emulated. This is true, for example, with the direct3d reference and WARP accelerators.
_Accelerator_view_impl_ptr _M_access_on_accelerator_view
Definition: amprt.h:2206
static _AMPIMP const wchar_t cpu_accelerator[]
String constant for cpu accelerator
Definition: amprt.h:1111
Definition: amprt.h:287
_Trace * _M_trace_ptr
Definition: amprt.h:4061
_Event _Copy_impl_iter(_InputInterator _SrcIter, _View_shape_ptr _Src_shape, _OutputIterator _DstIter, _View_shape_ptr _Dst_shape)
Definition: amprt.h:3183
_AMPIMP _Event _Add_event(_Event _Ev)
Creates an event which is an ordered collection of this and _Ev
unsigned int _M_bits_per_channel
Definition: amprt.h:2437
void then(const _Functor &_Func) const
Chains a callback Functor to the completion_future to be executed when the associated asynchronous op...
Definition: amprt.h:1484
unsigned int _M_filter_mode
Definition: amprt.h:2505
_Ret_ T * _Get_ptr() const
Definition: amprt.h:242
_AMPIMP ~scoped_d3d_access_lock()
Destructor for scoped_d3d_access_lock: unlock the accelerator_view.
unsigned int _M_mip_levels
Definition: amprt.h:2439
int i[4]
Definition: dvec.h:70
_Buffer_descriptor() __GPU
Definition: amprt.h:455
void _Set_row_pitch(size_t _Val)
Definition: amprt.h:2392
details::_Reference_counted_obj_ptr< details::_Ubiquitous_buffer > _Ubiquitous_buffer_ptr
Definition: amprt.h:305
static std::vector< accelerator > get_all()
Returns the vector of accelerator objects representing all available accelerators ...
Definition: amprt.h:1158
bool _Contains(const unsigned int *_Element_index) const
Definition: amprt.h:1921
_Texture_descriptor(const _Texture_descriptor &_Other) __GPU
Definition: amprt.h:604
completion_future(completion_future &&_Other)
Move constructor
Definition: amprt.h:1366
_AMPIMP _Ret_ _Accelerator_impl_ptr *__cdecl _Get_devices()
#define _ASSERTE(expr)
Definition: crtdbg.h:216
void _Set_buffer_ptr(_In_opt_ _Ubiquitous_buffer *_Buffer_ptr) __GPU_ONLY
Definition: amprt.h:519
_Access_mode _M_curr_cpu_access_mode
Definition: amprt.h:444
_In_ size_t _In_z_ const unsigned char * _Src
Definition: mbstring.h:95
unsigned int _Get_view_mipmap_levels() const __GPU
Definition: amprt.h:658
#define _In_
Definition: sal.h:314
_AMPIMP void __cdecl amp_uninitialize()
Uninitializes the C++ AMP runtime. It is legal to call this function multiple times during an applica...
volatile ULONG _M_counter
Definition: amprt.h:4058
#define _Inout_opt_
Definition: sal.h:385
_DPC_kernel_func_kind
Definition: amprt.h:3664
size_t _Get_num_elems() const
Definition: amprt.h:2105
std::unique_ptr< std::unordered_set< _View_key > > _M_view_keys
Definition: amprt.h:2223
bool _M_is_temp
Definition: amprt.h:2217
_Accelerator_view_impl_ptr _M_accelerator_view
Definition: amprt.h:2205
#define _In_opt_
Definition: sal.h:315
_Event_impl_ptr _M_ptr_event_impl
Definition: amprt.h:401
_Sampler_descriptor(_Sampler_descriptor &&_Other) __CPU_ONLY
Definition: amprt.h:765
#define FAILED(hr)
Definition: comutil.h:71
_Ret_ void * _Get_data_ptr() const
Definition: amprt.h:2462
bool _Overlaps(const _View_shape *_Other) const
Definition: amprt.h:1780
std::vector< unsigned int > _Src_copy_offset
Definition: amprt.h:3056
unsigned int _Get_view_size() const
Definition: amprt.h:1712
unsigned int _Dst_linear_offset
Definition: amprt.h:3059
Definition: amprt.h:2036
struct Concurrency::details::_Sampler_descriptor _Sampler_descriptor
_AMPIMP _View_shape_ptr _Get_view_shape(_In_ _View_key _Key)
_Reference_counted_obj_ptr & operator=(_Reference_counted_obj_ptr &&_Other)
Definition: amprt.h:210
_Ret_ _Accelerator_view_impl * _Get_access_on_accelerator_view_impl() const
Definition: amprt.h:2115
#define __CPU_ONLY
Definition: amprt.h:43
IUnknown * _M_data_ptr
Definition: amprt.h:553
BOOL _Is_buffer() const
Definition: amprt.h:3593
_Accelerator_view_impl_ptr _M_impl
Definition: amprt.h:1626
const float * _Get_border_color() const
Definition: amprt.h:2483
static _AMPIMP const wchar_t default_accelerator[]
String constant for default accelerator
Definition: amprt.h:1106
bool _M_is_adopted
Definition: amprt.h:2504
bool _M_is_staging
Definition: amprt.h:2214
_Ret_ _Buffer_descriptor * _Get_buffer_desc() const
Definition: amprt.h:3608
size_t _M_height
Definition: amprt.h:2433
_Array_copy_desc(const unsigned int _Rank, const unsigned int _Src_linear_offset, const unsigned int *_Src_extents, const unsigned int *_Src_copy_offset, const unsigned int _Dst_linear_offset, const unsigned int *_Dst_extents, const unsigned int *_Dst_copy_offset, const unsigned int *_Copy_extents)
Definition: amprt.h:3026
~_Reference_counted_obj_ptr()
Definition: amprt.h:186
void _Copy_data_on_host(int _Rank, const _Value_type *_Src, _Output_iterator _Dst, size_t _Width, size_t _Height, size_t _Depth, size_t _Src_row_pitch_in_bytes, size_t _Src_depth_pitch_in_bytes, size_t _Dst_row_pitch, size_t _Dst_depth_pitch)
Definition: amprt.h:2627
Exception thrown due to a C++ AMP runtime_exception. This is the base type for all C++ AMP exception ...
Definition: amprt.h:835
std::unordered_map< _View_key, _View_info * > _M_view_map
Definition: amprt.h:3430
_AMPIMP void HELPERAPI __dpc_set_kernel_shader_info(_In_ _DPC_call_handle *_Handle, _Inout_ void **_ShaderBlobs)
bool _Is_buffer() const
Definition: amprt.h:2163
_Buffer_descriptor * _View_key
Definition: amprt.h:404
void wait() const
Blocks until the associated asynchronous operation completes
Definition: amprt.h:1431
_AMPIMP size_t get_dedicated_memory() const
Get the dedicated memory for this accelerator in KB
struct Concurrency::details::_Buffer_descriptor _Buffer_descriptor
_AMPIMP accelerator_view get_default_view() const
Return the default accelerator view associated with this accelerator
_Ret_ void * _Get_host_ptr() const
Definition: amprt.h:2095
_AMPIMP accelerator __cdecl _Select_default_accelerator()
unsigned int _Get_linear_offset() const
Definition: amprt.h:1672
_AMPIMP void __cdecl d3d_access_lock(accelerator_view &_Av)
Acquire a lock on an accelerator_view for the purpose of safely performing D3D operations on resource...
unsigned int _M_compute_rank
Definition: amprt.h:3693
The Parallel Patterns Library (PPL) task class. A task object represents work that can be executed as...
Definition: ppltasks.h:4120
Definition: amprt.h:3574
std::unordered_set< accelerator_view, _Accelerator_view_hasher > _Accelerator_view_unordered_set
Definition: amprt.h:1654
#define HELPERAPI
Definition: amprt.h:3565
bool _Are_mipmap_levels_overlapping(const _Texture_descriptor *_Other) const __CPU_ONLY
Definition: amprt.h:694
size_t _Get_master_buffer_num_elems() const
Definition: amprt.h:3315
_AMPIMP bool set_default_cpu_access_type(access_type _Default_cpu_access_type)
Set the default cpu access_type for arrays created on this accelerator or for implicit memory allocat...
std::map< _Accelerator_view_impl_ptr, _Buffer_ptr > _M_buffer_map
Definition: amprt.h:3426
_AMPIMP void _Get_CPU_access(_Access_mode _Requested_mode) const
_Access_mode _M_current_host_access_mode
Definition: amprt.h:2210
_Accelerator_impl_ptr _M_impl
Definition: amprt.h:1336
details::_Reference_counted_obj_ptr< details::_Event_impl > _Event_impl_ptr
Definition: amprt.h:306
unsigned int _Get_data_length(unsigned int _Most_detailed_mipmap_level, unsigned int _View_mipmap_levels, const size_t *_Extents=nullptr) const
Definition: amprt.h:2332
size_t _M_actual_arg_num
Definition: amprt.h:3591
_Ret_ _Ubiquitous_buffer * _Get_buffer(const _Array_type &_Array) __CPU_ONLY
Definition: xxamp.h:1070
size_t _M_width
Definition: amprt.h:2432
unsigned int _M_groupCountX
Definition: amprt.h:3697
_Reference_counted_obj_ptr(_Reference_counted_obj_ptr &&_Other)
Definition: amprt.h:179
_Sampler_descriptor() __GPU
Definition: amprt.h:728
_Ret_ _Sampler * _Get_sampler_ptr() const __CPU_ONLY
Definition: amprt.h:775
_Reference_counter()
Definition: amprt.h:120
unsigned int _Get_mipmap_levels(const _Texture *_Tex)
Definition: amprt.h:4068
_Sampler_descriptor(_In_ _Sampler *_Sampler_ptr) __GPU
Definition: amprt.h:733
#define false
Definition: stdbool.h:11
_AMPIMP ULONG _Start_async_op_wait_event(ULONG _Async_op_id)
_Ret_ _View_key _Get_view_key()
Definition: amprt.h:532
_Access_mode _M_type_access_mode
Definition: amprt.h:449
void _Set_data_ptr(_In_ IUnknown *_Data_ptr)
Definition: amprt.h:2199
_AMPIMP access_type get_default_cpu_access_type() const
Get the default cpu access_type for buffers created on this accelerator
unsigned int _M_rank
Definition: amprt.h:1950
_AMPIMP void __cdecl d3d_access_unlock(accelerator_view &_Av)
Release the D3D access lock on the given accelerator_view. If the calling thread does not hold the lo...
bool _Is_valid() const
Definition: amprt.h:1886
unsigned int _M_groupExtentX
Definition: amprt.h:3702
void * _M_data_ptr
Definition: amprt.h:715
Concurrency::critical_section _M_critical_section
Definition: amprt.h:2224
friend _Event _Get_access_async(const _View_key _Key, accelerator_view _Av, _Access_mode _Mode, _Buffer_ptr &_Buf_ptr)
Definition: amprt.h:3457
_AMPIMP void _Init(const wchar_t *_Path)
unsigned int _Get_rank() const
Definition: amprt.h:2301
size_t _Get_depth_pitch() const
Definition: amprt.h:2397
~_Texture_descriptor() __GPU
Definition: amprt.h:598
_CRTIMP void __cdecl wait(unsigned int _Milliseconds)
Pauses the current context for a specified amount of time.
completion_future & operator=(completion_future &&_Other)
Move assignment operator
Definition: amprt.h:1395
unsigned int * _M_view_offset
Definition: amprt.h:1953
~_Buffer_descriptor() __GPU
Definition: amprt.h:470
_Device_resource_info * _M_device_resource_info
Definition: amprt.h:3677
_AMPIMP size_t __cdecl _Get_preferred_copy_chunk_size(size_t _Total_copy_size_in_bytes)
void * _M_data_ptr
Definition: amprt.h:432
Definition: amprt.h:88
bool _Is_projection() const
Definition: amprt.h:1696
_Ret_ _View_shape * _Get_buffer_view_shape(const _Buffer_descriptor &_Descriptor)
Definition: amprt.h:3463
unsigned int _Get_linear_offset(const unsigned int *_Element_index) const
Definition: amprt.h:1935
std::vector< std::tuple< PVOID, LPCWSTR, LPCWSTR > > _M_accelerator_configs
Definition: amprt.h:4055
_AMPIMP void HELPERAPI __dpc_dispatch_kernel(_In_ _DPC_call_handle *_Handle)
_Access_mode
Definition: amprt.h:82
_AMPIMP _Event & operator=(const _Event &_Other)
Assignment operator
static bool set_default(const std::wstring &_Path)
Sets the default accelerator to be used for any operation that implicitly uses the default accelerato...
Definition: amprt.h:1181
basic_string< wchar_t, char_traits< wchar_t >, allocator< wchar_t > > wstring
Definition: xstring:2645
bool operator==(const _Texture_descriptor &_Other) const __GPU
Definition: amprt.h:639
_AMPIMP void HELPERAPI __dpc_set_const_buffer_info(_In_ _DPC_call_handle *_Handle, _In_ _Device_const_buffer_info *_DeviceConstBufferInfo, size_t _NumConstBuffers)
bool _Is_adopted() const
Definition: amprt.h:2467
std::vector< unsigned int > _Dst_extents
Definition: amprt.h:3060
size_t _Get_height(unsigned int _Mip_offset=0) const
Definition: amprt.h:2291
void _Set_view_mipmap_levels(unsigned int _View_mipmap_levels) __CPU_ONLY
Definition: amprt.h:663
unsigned int _M_texture_format
Definition: amprt.h:2435
_AMPIMP void HELPERAPI __dpc_set_device_resource_info(_In_ _DPC_call_handle *_Handle, _In_ _Device_resource_info *_DeviceResourceInfo, size_t _NumResources)
_Array_copy_desc()
Definition: amprt.h:3049
_Device_const_buffer_info * _M_const_buffer_info
Definition: amprt.h:3683
_DPC_call_handle(const accelerator_view &_Accelerator_view)
Definition: amprt.h:3706
void _Set_texture_ptr(_In_opt_ _Texture *_Texture_ptr) __CPU_ONLY
Definition: amprt.h:668
Definition: amprt.h:3576
Definition: amprt.h:103
#define _AMPIMP
Definition: amprt.h:73
__declspec(property(get=get_device_path)) std _AMPIMP unsigned int get_version() const
Get the version for this accelerator
concurrency::task< void > to_task() const
Returns a concurrency::task object corresponding to the associated asynchronous operation ...
Definition: amprt.h:1497
void swap(array< _Ty, _Size > &_Left, array< _Ty, _Size > &_Right) _NOEXCEPT_OP(_NOEXCEPT_OP(_Left.swap(_Right)))
Definition: array:429
void advance(_InIt &_Where, _Diff _Off)
Definition: xutility:695
future_status
Definition: future:97
static unsigned int _Get_extent_size(unsigned int _Rank, const unsigned int *_Extent)
Definition: amprt.h:1876
T & operator*() const
Definition: amprt.h:232
#define _Out_writes_(size)
Definition: sal.h:354
static bool _Intervals_overlap(size_t _First_start, size_t _First_end, size_t _Second_start, size_t _Second_end)
Definition: amprt.h:1858
#define _T(x)
Definition: tchar.h:2498
Exception thrown when an underlying DirectX call fails due to the Windows timeout detection and recov...
Definition: amprt.h:3480
virtual ~_Reference_counter()
Definition: amprt.h:123
unsigned int _Get_view_linear_offset() const
Definition: amprt.h:1717
access_type _Get_allowed_host_access_type() const
Definition: amprt.h:2137
_AMPIMP _Event()
Constructor of the _Event.
Definition: amprt.h:99
Definition: amprt.h:84
int _M_is_flat_model
Definition: amprt.h:3692
unsigned int _M_groupCountY
Definition: amprt.h:3698
_AMPIMP void HELPERAPI __dpc_release_call_handle(_In_ _DPC_call_handle *_Handle)
unsigned int _M_num_channels
Definition: amprt.h:2438
queuing_mode
Queuing modes supported for accelerator views
Definition: amprt.h:826
_Ret_ _Sampler_descriptor * _Get_sampler_desc() const
Definition: amprt.h:3620
unsigned int _Src_linear_offset
Definition: amprt.h:3054
_Resource_kind
Definition: amprt.h:3572
_Buffer_descriptor(const _Buffer_descriptor &_Other) __GPU
Definition: amprt.h:476
unsigned int _M_most_detailed_mipmap_level
Definition: amprt.h:564
bool _Is_array() const
Definition: amprt.h:527
details::_Reference_counted_obj_ptr< details::_Sampler > _Sampler_ptr
Definition: amprt.h:304
_Check_return_ _In_z_ _Scanf_format_string_ const char * _Format
Definition: stdio.h:230
size_t _M_const_buf_size
Definition: amprt.h:3653
_Ret_ _View_shape * _Create_reinterpreted_shape(const _View_shape *_Source_shape, size_t _Curr_elem_size, size_t _New_elem_size)
Definition: amprt.h:1961
size_t _M_num_samplers
Definition: amprt.h:3680
_Accelerator_view_impl * _M_rv
Definition: amprt.h:3673
bool valid() const
Returns true if the object is associated with an asynchronous operation
Definition: amprt.h:1423
~_Sampler_descriptor() __GPU
Definition: amprt.h:740
_Accelerator_view_impl_ptr _M_impl
Definition: amprt.h:1082
_Ret_ _Accelerator_impl * _Get_accelerator_impl_ptr(const accelerator &_Accl)
Definition: amprt.h:1637
Definition: amprt.h:3446
friend class accelerator_view
Definition: amprt.h:1091
_AMPIMP const wchar_t * _Get_device_path() const
_AMPIMP void HELPERAPI __dpc_set_kernel_dispatch_info(_In_ _DPC_call_handle *_Handle, unsigned int _ComputeRank, _In_ int *_Extents, unsigned int _GroupRank, const unsigned int *_GroupExtents, unsigned int &_GroupCountX, unsigned int &_GroupCountY, unsigned int &_GroupCountZ)
#define _Inout_
Definition: sal.h:384
Concurrency::critical_section _M_critical_section
Definition: amprt.h:3442
Definition: ratio:103
_AMPIMP bool get_supports_double_precision() const
Returns a boolean value indicating whether the accelerator supports full double precision (including ...
static _AMPIMP accelerator_view __cdecl get_auto_selection_view()
Returns the auto selection accelerator_view which when specified as the parallel_for_each target resu...
size_t _M_depth_pitch
Definition: amprt.h:2442
_Ret_ _Texture * _Get_texture_ptr() const __CPU_ONLY
Definition: amprt.h:647
_Ret_ _Ubiquitous_buffer * _Get_buffer_ptr() const __CPU_ONLY
Definition: amprt.h:497
std::enable_if<!std::is_base_of< std::input_iterator_tag, typename std::iterator_traits< _OutputIterator >::iterator_category >::value >::type _Advance_output_iterator(_OutputIterator &_Iter, size_t _N)
Definition: amprt.h:2891
static void _UnInitialize(_In_ T *_Obj_ptr)
Definition: amprt.h:258
Definition: amprt.h:312
bool _M_is_explicit_target_acclview
Definition: amprt.h:3674
_Access_mode _Get_synchronize_access_mode(access_type cpu_access_type)
Definition: amprt.h:2003
const _View_key _Get_view_key() const
Definition: amprt.h:537
_Resource_kind _M_resource_kind
Definition: amprt.h:3581
_Texture_descriptor(unsigned int _Most_detailed_mipmap_level, unsigned int _View_mipmap_levels) __GPU
Definition: amprt.h:583
completion_future(const completion_future &_Other)
Copy constructor
Definition: amprt.h:1357
Definition: amprt.h:292
bool _Is_temp() const
Definition: amprt.h:2152
size_t _M_num_const_buffers
Definition: amprt.h:3684
size_t _M_num_elems
Definition: amprt.h:2212
bool _M_is_adopted
Definition: amprt.h:2219
~completion_future()
Destructor
Definition: amprt.h:1375
Class represents a accelerator abstraction for C++ AMP data-parallel devices
Definition: amprt.h:1089
_Access_mode _Get_current_host_access_mode() const
Definition: amprt.h:2147
Definition: amprt.h:100
Definition: xstring:21
_Texture_descriptor() __GPU
Definition: amprt.h:576
BOOL _Is_texture() const
Definition: amprt.h:3598
_Buffer * _M_master_buffer
Definition: amprt.h:3412
size_t _Remove_reference()
Definition: amprt.h:134
_Texture * _M_texture_ptr
Definition: amprt.h:560
void _Set_host_ptr(_In_ void *_Host_ptr, _Access_mode _Host_access_mode=_No_access)
Definition: amprt.h:2186
Definition: amprt.h:86
future< typename result_of< typename enable_if<!_Is_launch_type< typename decay< _Fty >::type >::value, _Fty >::type(_ArgTypes...)>::type > async(_Fty &&_Fnarg, _ArgTypes &&..._Args)
Definition: future:1893
std::unique_ptr< std::vector< int > > _M_Redirect_indices
Definition: amprt.h:3795
_AMPIMP size_t __cdecl _Get_num_devices()
size_t _Add_reference()
Definition: amprt.h:127
std::unique_ptr< std::unordered_set< void * > > _M_aliased_buffer_set
Definition: amprt.h:3792
_Texture_descriptor & operator=(const _Texture_descriptor &_Other) __GPU
Definition: amprt.h:620
Concurrency::critical_section _M_critical_section
Definition: amprt.h:4052
Exception thrown when an unsupported feature is used
Definition: amprt.h:3540
size_t _M_elem_size
Definition: amprt.h:2211
bool _Has_data_source() const
Definition: amprt.h:3320
void _Set_sampler_ptr(_In_opt_ _Sampler *_Sampler_ptr) __CPU_ONLY
Definition: amprt.h:780
_AMPIMP accelerator_view create_view(queuing_mode qmode=queuing_mode_automatic)
Create and return a new accelerator view on this accelerator with the specified queuing mode...
#define __GPU_ONLY
Definition: amprt.h:42
bool _M_is_buffer
Definition: amprt.h:2220
_AMPIMP HRESULT __cdecl _Recursive_array_copy(const _Array_copy_desc &_Desc, unsigned int _Native_copy_rank, std::function< HRESULT(const _Array_copy_desc &_Reduced)> _Native_copy_func)
size_t _Get_width(unsigned int _Mip_offset=0) const
Definition: amprt.h:2286
bool _Is_staging() const
Definition: amprt.h:2127
_FwdIt const _Ty _Val
Definition: algorithm:1938
bool set() const
Sets the task completion event.
Definition: ppltasks.h:2899
bool _Is_view_linear(unsigned int &_Linear_offset, unsigned int &_Linear_size) const
Definition: amprt.h:1755
Definition: amprt.h:3667
_Sampler * _M_sampler_ptr
Definition: amprt.h:722
concurrency::task< void > _M_task
Definition: amprt.h:1512
bool _Is_cpu_accelerator(const accelerator &_Accl)
Definition: amprt.h:3469
bool _Is_valid(size_t _Buffer_size) const
Definition: amprt.h:1701
struct Concurrency::details::_Texture_descriptor _Texture_descriptor
Definition: amprt.h:1657
void * _M_host_ptr
Definition: amprt.h:2208
unsigned int _Get_filter_mode() const
Definition: amprt.h:2473
unsigned int _M_address_mode
Definition: amprt.h:2506
_Access_mode _M_formal_access_mode
Definition: amprt.h:3586
_Reference_counted_obj_ptr(T *_Ptr=NULL)
Definition: amprt.h:167
unsigned int _Get_bits_per_channel() const
Definition: amprt.h:2321
static bool _Compare_extent_with_elem_size(unsigned int _Rank, const unsigned int *_Extent1, size_t _Elem_size1, const unsigned int *_Extent2, size_t _Elem_size2)
Definition: amprt.h:1723
void * _M_desc
Definition: amprt.h:3583
unsigned int _M_view_format
Definition: amprt.h:2436
const int * _Get_redirect_indices() const
Definition: amprt.h:3774
_Access_mode _M_allowed_host_access_mode
Definition: amprt.h:2209
_AMPIMP ~accelerator()
Destructor
__declspec(property(get=get_description)) std _AMPIMP bool get_is_debug() const
Returns a boolean value indicating whether the accelerator was created with DEBUG layer enabled for e...
_AMPIMP bool operator!=(const _Event &_Other) const
Return false if the other _Event is same as this _Event; true otherwise
Definition: amprt.h:290
unsigned int * _M_grid_extents
Definition: amprt.h:3694
_T _Least_common_multiple(_T _M, _T _N)
Definition: amprt.h:2755
friend _Event _Get_access_async(const _View_key _Key, accelerator_view _Av, _Access_mode _Mode, _Buffer_ptr &_Buf_ptr)
Definition: amprt.h:3457
void _Set_buffer_unaccessed(size_t _Buffer_idx)
Definition: amprt.h:3765
size_t _Get_preferred_copy_chunk_num_elems(size_t _Total_num_elems, size_t _Elem_size)
Definition: amprt.h:2686
_AMPIMP void _Write_end_event(ULONG _Span_id)
unsigned int _Get_rank() const
Definition: amprt.h:1667
#define _Ret_
Definition: sal.h:1005
_Texture_descriptor(const _Texture_descriptor &_Other, unsigned int _Most_detailed_mipmap_level, unsigned int _View_mipmap_levels) __GPU
Definition: amprt.h:612
_Access_mode _Get_allowed_host_access_mode() const
Definition: amprt.h:2132
concurrency::completion_future _Start_async_op_wait_event_helper(ULONG _Async_op_id, _Event _Ev)
Definition: amprt.h:3912
bool * _M_projection_info
Definition: amprt.h:1955
access_type
Enumeration type used to denote the various types of access to data.
Definition: amprt.h:97
void * _M_data_ptr
Definition: amprt.h:2207
Exception thrown when the runtime fails to launch a kernel using the compute domain specified at the ...
Definition: amprt.h:3520
size_t _M_master_buffer_elem_size
Definition: amprt.h:3415
_AMPIMP runtime_exception & operator=(const runtime_exception &_Other)
Assignment operator
HRESULT _M_view_removed_reason_code
Definition: amprt.h:3513
_AMPIMP void __cdecl _Register_async_event(const _Event &_Ev, const std::shared_future< void > &_Shared_future)
_AMPIMP bool _Is_finished_nothrow()
Poll whether the _Event has completed or not. Swallows any exceptions
_Ret_ _Accelerator_view_impl * _Get_master_accelerator_view_impl() const
Definition: amprt.h:3305
completion_future(const std::shared_future< void > &_Shared_future, const concurrency::task< void > &_Task)
Definition: amprt.h:1505