diff options
Diffstat (limited to 'include/arch/x86_64/drm')
31 files changed, 28039 insertions, 0 deletions
diff --git a/include/arch/x86_64/drm/amdgpu_drm.h b/include/arch/x86_64/drm/amdgpu_drm.h new file mode 100644 index 00000000..b6f422e8 --- /dev/null +++ b/include/arch/x86_64/drm/amdgpu_drm.h @@ -0,0 +1,1665 @@ +/* amdgpu_drm.h -- Public header for the amdgpu driver -*- linux-c -*- + * + * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Fremont, California. + * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Kevin E. Martin <martin@valinux.com> + * Gareth Hughes <gareth@valinux.com> + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef __AMDGPU_DRM_H__ +#define __AMDGPU_DRM_H__ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_AMDGPU_GEM_CREATE 0x00 +#define DRM_AMDGPU_GEM_MMAP 0x01 +#define DRM_AMDGPU_CTX 0x02 +#define DRM_AMDGPU_BO_LIST 0x03 +#define DRM_AMDGPU_CS 0x04 +#define DRM_AMDGPU_INFO 0x05 +#define DRM_AMDGPU_GEM_METADATA 0x06 +#define DRM_AMDGPU_GEM_WAIT_IDLE 0x07 +#define DRM_AMDGPU_GEM_VA 0x08 +#define DRM_AMDGPU_WAIT_CS 0x09 +#define DRM_AMDGPU_GEM_OP 0x10 +#define DRM_AMDGPU_GEM_USERPTR 0x11 +#define DRM_AMDGPU_WAIT_FENCES 0x12 +#define DRM_AMDGPU_VM 0x13 +#define DRM_AMDGPU_FENCE_TO_HANDLE 0x14 +#define DRM_AMDGPU_SCHED 0x15 +#define DRM_AMDGPU_USERQ 0x16 +#define DRM_AMDGPU_USERQ_SIGNAL 0x17 +#define DRM_AMDGPU_USERQ_WAIT 0x18 + +#define DRM_IOCTL_AMDGPU_GEM_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, \ + union drm_amdgpu_gem_create) +#define DRM_IOCTL_AMDGPU_GEM_MMAP \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, \ + union drm_amdgpu_gem_mmap) +#define DRM_IOCTL_AMDGPU_CTX \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CTX, union drm_amdgpu_ctx) +#define DRM_IOCTL_AMDGPU_BO_LIST \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_BO_LIST, \ + union drm_amdgpu_bo_list) +#define DRM_IOCTL_AMDGPU_CS \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CS, union drm_amdgpu_cs) +#define DRM_IOCTL_AMDGPU_INFO \ + DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_INFO, struct drm_amdgpu_info) +#define DRM_IOCTL_AMDGPU_GEM_METADATA \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, \ + struct drm_amdgpu_gem_metadata) +#define DRM_IOCTL_AMDGPU_GEM_WAIT_IDLE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_WAIT_IDLE, \ + union drm_amdgpu_gem_wait_idle) +#define DRM_IOCTL_AMDGPU_GEM_VA \ + DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_VA, struct drm_amdgpu_gem_va) +#define DRM_IOCTL_AMDGPU_WAIT_CS \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, \ + union drm_amdgpu_wait_cs) +#define DRM_IOCTL_AMDGPU_GEM_OP \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op) +#define DRM_IOCTL_AMDGPU_GEM_USERPTR \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, \ + struct drm_amdgpu_gem_userptr) +#define DRM_IOCTL_AMDGPU_WAIT_FENCES \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, \ + union drm_amdgpu_wait_fences) +#define DRM_IOCTL_AMDGPU_VM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) +#define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, \ + union drm_amdgpu_fence_to_handle) +#define DRM_IOCTL_AMDGPU_SCHED \ + DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) +#define DRM_IOCTL_AMDGPU_USERQ \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq) +#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, \ + struct drm_amdgpu_userq_signal) +#define DRM_IOCTL_AMDGPU_USERQ_WAIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, \ + struct drm_amdgpu_userq_wait) + +/** + * DOC: memory domains + * + * %AMDGPU_GEM_DOMAIN_CPU System memory that is not GPU accessible. + * Memory in this pool could be swapped out to disk if there is pressure. + * + * %AMDGPU_GEM_DOMAIN_GTT GPU accessible system memory, mapped into the + * GPU's virtual address space via gart. Gart memory linearizes non-contiguous + * pages of system memory, allows GPU access system memory in a linearized + * fashion. + * + * %AMDGPU_GEM_DOMAIN_VRAM Local video memory. For APUs, it is memory + * carved out by the BIOS. + * + * %AMDGPU_GEM_DOMAIN_GDS Global on-chip data storage used to share data + * across shader threads. + * + * %AMDGPU_GEM_DOMAIN_GWS Global wave sync, used to synchronize the + * execution of all the waves on a device. + * + * %AMDGPU_GEM_DOMAIN_OA Ordered append, used by 3D or Compute engines + * for appending data. + * + * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for + * signalling user mode queues. + */ +#define AMDGPU_GEM_DOMAIN_CPU 0x1 +#define AMDGPU_GEM_DOMAIN_GTT 0x2 +#define AMDGPU_GEM_DOMAIN_VRAM 0x4 +#define AMDGPU_GEM_DOMAIN_GDS 0x8 +#define AMDGPU_GEM_DOMAIN_GWS 0x10 +#define AMDGPU_GEM_DOMAIN_OA 0x20 +#define AMDGPU_GEM_DOMAIN_DOORBELL 0x40 +#define AMDGPU_GEM_DOMAIN_MASK \ + (AMDGPU_GEM_DOMAIN_CPU | AMDGPU_GEM_DOMAIN_GTT | \ + AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GDS | \ + AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | \ + AMDGPU_GEM_DOMAIN_DOORBELL) + +/* Flag that CPU access will be required for the case of VRAM domain */ +#define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) +/* Flag that CPU access will not work, this VRAM domain is invisible */ +#define AMDGPU_GEM_CREATE_NO_CPU_ACCESS (1 << 1) +/* Flag that USWC attributes should be used for GTT */ +#define AMDGPU_GEM_CREATE_CPU_GTT_USWC (1 << 2) +/* Flag that the memory should be in VRAM and cleared */ +#define AMDGPU_GEM_CREATE_VRAM_CLEARED (1 << 3) +/* Flag that allocating the BO should use linear VRAM */ +#define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) +/* Flag that BO is always valid in this VM */ +#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) +/* Flag that BO sharing will be explicitly synchronized */ +#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC (1 << 7) +/* Flag that indicates allocating MQD gart on GFX9, where the mtype + * for the second page onward should be set to NC. It should never + * be used by user space applications. + */ +#define AMDGPU_GEM_CREATE_CP_MQD_GFX9 (1 << 8) +/* Flag that BO may contain sensitive data that must be wiped before + * releasing the memory + */ +#define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE (1 << 9) +/* Flag that BO will be encrypted and that the TMZ bit should be + * set in the PTEs when mapping this buffer via GPUVM or + * accessing it with various hw blocks + */ +#define AMDGPU_GEM_CREATE_ENCRYPTED (1 << 10) +/* Flag that BO will be used only in preemptible context, which does + * not require GTT memory accounting + */ +#define AMDGPU_GEM_CREATE_PREEMPTIBLE (1 << 11) +/* Flag that BO can be discarded under memory pressure without keeping the + * content. + */ +#define AMDGPU_GEM_CREATE_DISCARDABLE (1 << 12) +/* Flag that BO is shared coherently between multiple devices or CPU threads. + * May depend on GPU instructions to flush caches to system scope explicitly. + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_COHERENT (1 << 13) +/* Flag that BO should not be cached by GPU. Coherent without having to flush + * GPU caches explicitly + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_UNCACHED (1 << 14) +/* Flag that BO should be coherent across devices when using device-level + * atomics. May depend on GPU instructions to flush caches to device scope + * explicitly, promoting them to system scope automatically. + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_EXT_COHERENT (1 << 15) +/* Set PTE.D and recompress during GTT->VRAM moves according to TILING flags. */ +#define AMDGPU_GEM_CREATE_GFX12_DCC (1 << 16) + +struct drm_amdgpu_gem_create_in { + /** the requested memory size */ + __u64 bo_size; + /** physical start_addr alignment in bytes for some HW requirements */ + __u64 alignment; + /** the requested memory domains */ + __u64 domains; + /** allocation flags */ + __u64 domain_flags; +}; + +struct drm_amdgpu_gem_create_out { + /** returned GEM object handle */ + __u32 handle; + __u32 _pad; +}; + +union drm_amdgpu_gem_create { + struct drm_amdgpu_gem_create_in in; + struct drm_amdgpu_gem_create_out out; +}; + +/** Opcode to create new residency list. */ +#define AMDGPU_BO_LIST_OP_CREATE 0 +/** Opcode to destroy previously created residency list */ +#define AMDGPU_BO_LIST_OP_DESTROY 1 +/** Opcode to update resource information in the list */ +#define AMDGPU_BO_LIST_OP_UPDATE 2 + +struct drm_amdgpu_bo_list_in { + /** Type of operation */ + __u32 operation; + /** Handle of list or 0 if we want to create one */ + __u32 list_handle; + /** Number of BOs in list */ + __u32 bo_number; + /** Size of each element describing BO */ + __u32 bo_info_size; + /** Pointer to array describing BOs */ + __u64 bo_info_ptr; +}; + +struct drm_amdgpu_bo_list_entry { + /** Handle of BO */ + __u32 bo_handle; + /** New (if specified) BO priority to be used during migration */ + __u32 bo_priority; +}; + +struct drm_amdgpu_bo_list_out { + /** Handle of resource list */ + __u32 list_handle; + __u32 _pad; +}; + +union drm_amdgpu_bo_list { + struct drm_amdgpu_bo_list_in in; + struct drm_amdgpu_bo_list_out out; +}; + +/* context related */ +#define AMDGPU_CTX_OP_ALLOC_CTX 1 +#define AMDGPU_CTX_OP_FREE_CTX 2 +#define AMDGPU_CTX_OP_QUERY_STATE 3 +#define AMDGPU_CTX_OP_QUERY_STATE2 4 +#define AMDGPU_CTX_OP_GET_STABLE_PSTATE 5 +#define AMDGPU_CTX_OP_SET_STABLE_PSTATE 6 + +/* GPU reset status */ +#define AMDGPU_CTX_NO_RESET 0 +/* this the context caused it */ +#define AMDGPU_CTX_GUILTY_RESET 1 +/* some other context caused it */ +#define AMDGPU_CTX_INNOCENT_RESET 2 +/* unknown cause */ +#define AMDGPU_CTX_UNKNOWN_RESET 3 + +/* indicate gpu reset occurred after ctx created */ +#define AMDGPU_CTX_QUERY2_FLAGS_RESET (1 << 0) +/* indicate vram lost occurred after ctx created */ +#define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1 << 1) +/* indicate some job from this context once cause gpu hang */ +#define AMDGPU_CTX_QUERY2_FLAGS_GUILTY (1 << 2) +/* indicate some errors are detected by RAS */ +#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE (1 << 3) +#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE (1 << 4) +/* indicate that the reset hasn't completed yet */ +#define AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS (1 << 5) + +/* Context priority level */ +#define AMDGPU_CTX_PRIORITY_UNSET -2048 +#define AMDGPU_CTX_PRIORITY_VERY_LOW -1023 +#define AMDGPU_CTX_PRIORITY_LOW -512 +#define AMDGPU_CTX_PRIORITY_NORMAL 0 +/* + * When used in struct drm_amdgpu_ctx_in, a priority above NORMAL requires + * CAP_SYS_NICE or DRM_MASTER + */ +#define AMDGPU_CTX_PRIORITY_HIGH 512 +#define AMDGPU_CTX_PRIORITY_VERY_HIGH 1023 + +/* select a stable profiling pstate for perfmon tools */ +#define AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK 0xf +#define AMDGPU_CTX_STABLE_PSTATE_NONE 0 +#define AMDGPU_CTX_STABLE_PSTATE_STANDARD 1 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK 2 +#define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK 3 +#define AMDGPU_CTX_STABLE_PSTATE_PEAK 4 + +struct drm_amdgpu_ctx_in { + /** AMDGPU_CTX_OP_* */ + __u32 op; + /** Flags */ + __u32 flags; + __u32 ctx_id; + /** AMDGPU_CTX_PRIORITY_* */ + __s32 priority; +}; + +union drm_amdgpu_ctx_out { + struct { + __u32 ctx_id; + __u32 _pad; + } alloc; + + struct { + /** For future use, no flags defined so far */ + __u64 flags; + /** Number of resets caused by this context so far. */ + __u32 hangs; + /** Reset status since the last call of the ioctl. */ + __u32 reset_status; + } state; + + struct { + __u32 flags; + __u32 _pad; + } pstate; +}; + +union drm_amdgpu_ctx { + struct drm_amdgpu_ctx_in in; + union drm_amdgpu_ctx_out out; +}; + +/* user queue IOCTL operations */ +#define AMDGPU_USERQ_OP_CREATE 1 +#define AMDGPU_USERQ_OP_FREE 2 + +/* queue priority levels */ +/* low < normal low < normal high < high */ +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK 0x3 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT 0 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW 0 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW 1 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH 2 +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */ +/* for queues that need access to protected content */ +#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE (1 << 2) + +/* + * This structure is a container to pass input configuration + * info for all supported userqueue related operations. + * For operation AMDGPU_USERQ_OP_CREATE: user is expected + * to set all fields, excep the parameter 'queue_id'. + * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected + * to be set is 'queue_id', eveything else is ignored. + */ +struct drm_amdgpu_userq_in { + /** AMDGPU_USERQ_OP_* */ + __u32 op; + /** Queue id passed for operation USERQ_OP_FREE */ + __u32 queue_id; + /** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */ + __u32 ip_type; + /** + * @doorbell_handle: the handle of doorbell GEM object + * associated with this userqueue client. + */ + __u32 doorbell_handle; + /** + * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo. + * Kernel will generate absolute doorbell offset using doorbell_handle + * and doorbell_offset in the doorbell bo. + */ + __u32 doorbell_offset; + /** + * @flags: flags used for queue parameters + */ + __u32 flags; + /** + * @queue_va: Virtual address of the GPU memory which holds the queue + * object. The queue holds the workload packets. + */ + __u64 queue_va; + /** + * @queue_size: Size of the queue in bytes, this needs to be 256-byte + * aligned. + */ + __u64 queue_size; + /** + * @rptr_va : Virtual address of the GPU memory which holds the ring + * RPTR. This object must be at least 8 byte in size and aligned to + * 8-byte offset. + */ + __u64 rptr_va; + /** + * @wptr_va : Virtual address of the GPU memory which holds the ring + * WPTR. This object must be at least 8 byte in size and aligned to + * 8-byte offset. + * + * Queue, RPTR and WPTR can come from the same object, as long as the + * size and alignment related requirements are met. + */ + __u64 wptr_va; + /** + * @mqd: MQD (memory queue descriptor) is a set of parameters which + * allow the GPU to uniquely define and identify a usermode queue. + * + * MQD data can be of different size for different GPU IP/engine and + * their respective versions/revisions, so this points to a __u64 * + * which holds IP specific MQD of this usermode queue. + */ + __u64 mqd; + /** + * @size: size of MQD data in bytes, it must match the MQD structure + * size of the respective engine/revision defined in UAPI for ex, for + * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11). + */ + __u64 mqd_size; +}; + +/* The structure to carry output of userqueue ops */ +struct drm_amdgpu_userq_out { + /** + * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique + * queue ID to represent the newly created userqueue in the system, + * otherwise it should be ignored. + */ + __u32 queue_id; + __u32 _pad; +}; + +union drm_amdgpu_userq { + struct drm_amdgpu_userq_in in; + struct drm_amdgpu_userq_out out; +}; + +/* GFX V11 IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_gfx11 { + /** + * @shadow_va: Virtual address of the GPU memory to hold the shadow + * buffer. Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 shadow_va; + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * Use AMDGPU_INFO_IOCTL to find the exact size of the object. + */ + __u64 csa_va; +}; + +/* GFX V11 SDMA IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_sdma_gfx11 { + /** + * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL + * to get the size. + */ + __u64 csa_va; +}; + +/* GFX V11 Compute IP specific MQD parameters */ +struct drm_amdgpu_userq_mqd_compute_gfx11 { + /** + * @eop_va: Virtual address of the GPU memory to hold the EOP buffer. + * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL + * to get the size. + */ + __u64 eop_va; +}; + +/* userq signal/wait ioctl */ +struct drm_amdgpu_userq_signal { + /** + * @queue_id: Queue handle used by the userq fence creation function + * to retrieve the WPTR. + */ + __u32 queue_id; + __u32 pad; + /** + * @syncobj_handles: The list of syncobj handles submitted by the user + * queue job to be signaled. + */ + __u64 syncobj_handles; + /** + * @num_syncobj_handles: A count that represents the number of syncobj + * handles in + * @syncobj_handles. + */ + __u64 num_syncobj_handles; + /** + * @bo_read_handles: The list of BO handles that the submitted user + * queue job is using for read only. This will update BO fences in the + * kernel. + */ + __u64 bo_read_handles; + /** + * @bo_write_handles: The list of BO handles that the submitted user + * queue job is using for write only. This will update BO fences in the + * kernel. + */ + __u64 bo_write_handles; + /** + * @num_bo_read_handles: A count that represents the number of read BO + * handles in + * @bo_read_handles. + */ + __u32 num_bo_read_handles; + /** + * @num_bo_write_handles: A count that represents the number of write BO + * handles in + * @bo_write_handles. + */ + __u32 num_bo_write_handles; +}; + +struct drm_amdgpu_userq_fence_info { + /** + * @va: A gpu address allocated for each queue which stores the + * read pointer (RPTR) value. + */ + __u64 va; + /** + * @value: A 64 bit value represents the write pointer (WPTR) of the + * queue commands which compared with the RPTR value to signal the + * fences. + */ + __u64 value; +}; + +struct drm_amdgpu_userq_wait { + /** + * @waitq_id: Queue handle used by the userq wait IOCTL to retrieve the + * wait queue and maintain the fence driver references in it. + */ + __u32 waitq_id; + __u32 pad; + /** + * @syncobj_handles: The list of syncobj handles submitted by the user + * queue job to get the va/value pairs. + */ + __u64 syncobj_handles; + /** + * @syncobj_timeline_handles: The list of timeline syncobj handles + * submitted by the user queue job to get the va/value pairs at given + * @syncobj_timeline_points. + */ + __u64 syncobj_timeline_handles; + /** + * @syncobj_timeline_points: The list of timeline syncobj points + * submitted by the user queue job for the corresponding + * @syncobj_timeline_handles. + */ + __u64 syncobj_timeline_points; + /** + * @bo_read_handles: The list of read BO handles submitted by the user + * queue job to get the va/value pairs. + */ + __u64 bo_read_handles; + /** + * @bo_write_handles: The list of write BO handles submitted by the user + * queue job to get the va/value pairs. + */ + __u64 bo_write_handles; + /** + * @num_syncobj_timeline_handles: A count that represents the number of + * timeline syncobj handles in @syncobj_timeline_handles. + */ + __u16 num_syncobj_timeline_handles; + /** + * @num_fences: This field can be used both as input and output. As + * input it defines the maximum number of fences that can be returned + * and as output it will specify how many fences were actually returned + * from the ioctl. + */ + __u16 num_fences; + /** + * @num_syncobj_handles: A count that represents the number of syncobj + * handles in + * @syncobj_handles. + */ + __u32 num_syncobj_handles; + /** + * @num_bo_read_handles: A count that represents the number of read BO + * handles in + * @bo_read_handles. + */ + __u32 num_bo_read_handles; + /** + * @num_bo_write_handles: A count that represents the number of write BO + * handles in + * @bo_write_handles. + */ + __u32 num_bo_write_handles; + /** + * @out_fences: The field is a return value from the ioctl containing + * the list of address/value pairs to wait for. + */ + __u64 out_fences; +}; + +/* vm ioctl */ +#define AMDGPU_VM_OP_RESERVE_VMID 1 +#define AMDGPU_VM_OP_UNRESERVE_VMID 2 + +struct drm_amdgpu_vm_in { + /** AMDGPU_VM_OP_* */ + __u32 op; + __u32 flags; +}; + +struct drm_amdgpu_vm_out { + /** For future use, no flags defined so far */ + __u64 flags; +}; + +union drm_amdgpu_vm { + struct drm_amdgpu_vm_in in; + struct drm_amdgpu_vm_out out; +}; + +/* sched ioctl */ +#define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE 1 +#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE 2 + +struct drm_amdgpu_sched_in { + /* AMDGPU_SCHED_OP_* */ + __u32 op; + __u32 fd; + /** AMDGPU_CTX_PRIORITY_* */ + __s32 priority; + __u32 ctx_id; +}; + +union drm_amdgpu_sched { + struct drm_amdgpu_sched_in in; +}; + +/* + * This is not a reliable API and you should expect it to fail for any + * number of reasons and have fallback path that do not use userptr to + * perform any operation. + */ +#define AMDGPU_GEM_USERPTR_READONLY (1 << 0) +#define AMDGPU_GEM_USERPTR_ANONONLY (1 << 1) +#define AMDGPU_GEM_USERPTR_VALIDATE (1 << 2) +#define AMDGPU_GEM_USERPTR_REGISTER (1 << 3) + +struct drm_amdgpu_gem_userptr { + __u64 addr; + __u64 size; + /* AMDGPU_GEM_USERPTR_* */ + __u32 flags; + /* Resulting GEM handle */ + __u32 handle; +}; + +/* SI-CI-VI: */ +/* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */ +#define AMDGPU_TILING_ARRAY_MODE_SHIFT 0 +#define AMDGPU_TILING_ARRAY_MODE_MASK 0xf +#define AMDGPU_TILING_PIPE_CONFIG_SHIFT 4 +#define AMDGPU_TILING_PIPE_CONFIG_MASK 0x1f +#define AMDGPU_TILING_TILE_SPLIT_SHIFT 9 +#define AMDGPU_TILING_TILE_SPLIT_MASK 0x7 +#define AMDGPU_TILING_MICRO_TILE_MODE_SHIFT 12 +#define AMDGPU_TILING_MICRO_TILE_MODE_MASK 0x7 +#define AMDGPU_TILING_BANK_WIDTH_SHIFT 15 +#define AMDGPU_TILING_BANK_WIDTH_MASK 0x3 +#define AMDGPU_TILING_BANK_HEIGHT_SHIFT 17 +#define AMDGPU_TILING_BANK_HEIGHT_MASK 0x3 +#define AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT 19 +#define AMDGPU_TILING_MACRO_TILE_ASPECT_MASK 0x3 +#define AMDGPU_TILING_NUM_BANKS_SHIFT 21 +#define AMDGPU_TILING_NUM_BANKS_MASK 0x3 + +/* GFX9 - GFX11: */ +#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT 0 +#define AMDGPU_TILING_SWIZZLE_MODE_MASK 0x1f +#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT 5 +#define AMDGPU_TILING_DCC_OFFSET_256B_MASK 0xFFFFFF +#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT 29 +#define AMDGPU_TILING_DCC_PITCH_MAX_MASK 0x3FFF +#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT 43 +#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK 0x1 +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT 44 +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x1 +#define AMDGPU_TILING_SCANOUT_SHIFT 63 +#define AMDGPU_TILING_SCANOUT_MASK 0x1 + +/* GFX12 and later: */ +#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT 0 +#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK 0x7 +/* These are DCC recompression settings for memory management: */ +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3 +#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK \ + 0x3 /* 0:64B, 1:128B, 2:256B */ +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT 5 +#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK \ + 0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */ +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8 +#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK \ + 0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */ +/* When clearing the buffer or moving it from VRAM to GTT, don't compress and + * set DCC metadata to uncompressed. Set when parts of an allocation bypass DCC + * and read raw data. */ +#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT 14 +#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK 0x1 +/* bit gap */ +#define AMDGPU_TILING_GFX12_SCANOUT_SHIFT 63 +#define AMDGPU_TILING_GFX12_SCANOUT_MASK 0x1 + +/* Set/Get helpers for tiling flags. */ +#define AMDGPU_TILING_SET(field, value) \ + (((__u64)(value) & AMDGPU_TILING_##field##_MASK) \ + << AMDGPU_TILING_##field##_SHIFT) +#define AMDGPU_TILING_GET(value, field) \ + (((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & \ + AMDGPU_TILING_##field##_MASK) + +#define AMDGPU_GEM_METADATA_OP_SET_METADATA 1 +#define AMDGPU_GEM_METADATA_OP_GET_METADATA 2 + +/** The same structure is shared for input/output */ +struct drm_amdgpu_gem_metadata { + /** GEM Object handle */ + __u32 handle; + /** Do we want get or set metadata */ + __u32 op; + struct { + /** For future use, no flags defined so far */ + __u64 flags; + /** family specific tiling info */ + __u64 tiling_info; + __u32 data_size_bytes; + __u32 data[64]; + } data; +}; + +struct drm_amdgpu_gem_mmap_in { + /** the GEM object handle */ + __u32 handle; + __u32 _pad; +}; + +struct drm_amdgpu_gem_mmap_out { + /** mmap offset from the vma offset manager */ + __u64 addr_ptr; +}; + +union drm_amdgpu_gem_mmap { + struct drm_amdgpu_gem_mmap_in in; + struct drm_amdgpu_gem_mmap_out out; +}; + +struct drm_amdgpu_gem_wait_idle_in { + /** GEM object handle */ + __u32 handle; + /** For future use, no flags defined so far */ + __u32 flags; + /** Absolute timeout to wait */ + __u64 timeout; +}; + +struct drm_amdgpu_gem_wait_idle_out { + /** BO status: 0 - BO is idle, 1 - BO is busy */ + __u32 status; + /** Returned current memory domain */ + __u32 domain; +}; + +union drm_amdgpu_gem_wait_idle { + struct drm_amdgpu_gem_wait_idle_in in; + struct drm_amdgpu_gem_wait_idle_out out; +}; + +struct drm_amdgpu_wait_cs_in { + /* Command submission handle + * handle equals 0 means none to wait for + * handle equals ~0ull means wait for the latest sequence number + */ + __u64 handle; + /** Absolute timeout to wait */ + __u64 timeout; + __u32 ip_type; + __u32 ip_instance; + __u32 ring; + __u32 ctx_id; +}; + +struct drm_amdgpu_wait_cs_out { + /** CS status: 0 - CS completed, 1 - CS still busy */ + __u64 status; +}; + +union drm_amdgpu_wait_cs { + struct drm_amdgpu_wait_cs_in in; + struct drm_amdgpu_wait_cs_out out; +}; + +struct drm_amdgpu_fence { + __u32 ctx_id; + __u32 ip_type; + __u32 ip_instance; + __u32 ring; + __u64 seq_no; +}; + +struct drm_amdgpu_wait_fences_in { + /** This points to uint64_t * which points to fences */ + __u64 fences; + __u32 fence_count; + __u32 wait_all; + __u64 timeout_ns; +}; + +struct drm_amdgpu_wait_fences_out { + __u32 status; + __u32 first_signaled; +}; + +union drm_amdgpu_wait_fences { + struct drm_amdgpu_wait_fences_in in; + struct drm_amdgpu_wait_fences_out out; +}; + +#define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO 0 +#define AMDGPU_GEM_OP_SET_PLACEMENT 1 + +/* Sets or returns a value associated with a buffer. */ +struct drm_amdgpu_gem_op { + /** GEM object handle */ + __u32 handle; + /** AMDGPU_GEM_OP_* */ + __u32 op; + /** Input or return value */ + __u64 value; +}; + +#define AMDGPU_VA_OP_MAP 1 +#define AMDGPU_VA_OP_UNMAP 2 +#define AMDGPU_VA_OP_CLEAR 3 +#define AMDGPU_VA_OP_REPLACE 4 + +/* Delay the page table update till the next CS */ +#define AMDGPU_VM_DELAY_UPDATE (1 << 0) + +/* Mapping flags */ +/* readable mapping */ +#define AMDGPU_VM_PAGE_READABLE (1 << 1) +/* writable mapping */ +#define AMDGPU_VM_PAGE_WRITEABLE (1 << 2) +/* executable mapping, new for VI */ +#define AMDGPU_VM_PAGE_EXECUTABLE (1 << 3) +/* partially resident texture */ +#define AMDGPU_VM_PAGE_PRT (1 << 4) +/* MTYPE flags use bit 5 to 8 */ +#define AMDGPU_VM_MTYPE_MASK (0xf << 5) +/* Default MTYPE. Pre-AI must use this. Recommended for newer ASICs. */ +#define AMDGPU_VM_MTYPE_DEFAULT (0 << 5) +/* Use Non Coherent MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_NC (1 << 5) +/* Use Write Combine MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_WC (2 << 5) +/* Use Cache Coherent MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_CC (3 << 5) +/* Use UnCached MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_UC (4 << 5) +/* Use Read Write MTYPE instead of default MTYPE */ +#define AMDGPU_VM_MTYPE_RW (5 << 5) +/* don't allocate MALL */ +#define AMDGPU_VM_PAGE_NOALLOC (1 << 9) + +struct drm_amdgpu_gem_va { + /** GEM object handle */ + __u32 handle; + __u32 _pad; + /** AMDGPU_VA_OP_* */ + __u32 operation; + /** AMDGPU_VM_PAGE_* */ + __u32 flags; + /** va address to assign . Must be correctly aligned.*/ + __u64 va_address; + /** Specify offset inside of BO to assign. Must be correctly aligned.*/ + __u64 offset_in_bo; + /** Specify mapping size. Must be correctly aligned. */ + __u64 map_size; + /** + * vm_timeline_point is a sequence number used to add new timeline + * point. + */ + __u64 vm_timeline_point; + /** + * The vm page table update fence is installed in given + * vm_timeline_syncobj_out at vm_timeline_point. + */ + __u32 vm_timeline_syncobj_out; + /** the number of syncobj handles in @input_fence_syncobj_handles */ + __u32 num_syncobj_handles; + /** Array of sync object handle to wait for given input fences */ + __u64 input_fence_syncobj_handles; +}; + +#define AMDGPU_HW_IP_GFX 0 +#define AMDGPU_HW_IP_COMPUTE 1 +#define AMDGPU_HW_IP_DMA 2 +#define AMDGPU_HW_IP_UVD 3 +#define AMDGPU_HW_IP_VCE 4 +#define AMDGPU_HW_IP_UVD_ENC 5 +#define AMDGPU_HW_IP_VCN_DEC 6 +/* + * From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support + * both encoding and decoding jobs. + */ +#define AMDGPU_HW_IP_VCN_ENC 7 +#define AMDGPU_HW_IP_VCN_JPEG 8 +#define AMDGPU_HW_IP_VPE 9 +#define AMDGPU_HW_IP_NUM 10 + +#define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1 + +#define AMDGPU_CHUNK_ID_IB 0x01 +#define AMDGPU_CHUNK_ID_FENCE 0x02 +#define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 +#define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04 +#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05 +#define AMDGPU_CHUNK_ID_BO_HANDLES 0x06 +#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07 +#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT 0x08 +#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL 0x09 +#define AMDGPU_CHUNK_ID_CP_GFX_SHADOW 0x0a + +struct drm_amdgpu_cs_chunk { + __u32 chunk_id; + __u32 length_dw; + __u64 chunk_data; +}; + +struct drm_amdgpu_cs_in { + /** Rendering context id */ + __u32 ctx_id; + /** Handle of resource list associated with CS */ + __u32 bo_list_handle; + __u32 num_chunks; + __u32 flags; + /** this points to __u64 * which point to cs chunks */ + __u64 chunks; +}; + +struct drm_amdgpu_cs_out { + __u64 handle; +}; + +union drm_amdgpu_cs { + struct drm_amdgpu_cs_in in; + struct drm_amdgpu_cs_out out; +}; + +/* Specify flags to be used for IB */ + +/* This IB should be submitted to CE */ +#define AMDGPU_IB_FLAG_CE (1 << 0) + +/* Preamble flag, which means the IB could be dropped if no context switch */ +#define AMDGPU_IB_FLAG_PREAMBLE (1 << 1) + +/* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */ +#define AMDGPU_IB_FLAG_PREEMPT (1 << 2) + +/* The IB fence should do the L2 writeback but not invalidate any shader + * caches (L2/vL1/sL1/I$). */ +#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3) + +/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER. + * This will reset wave ID counters for the IB. + */ +#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4) + +/* Flag the IB as secure (TMZ) + */ +#define AMDGPU_IB_FLAGS_SECURE (1 << 5) + +/* Tell KMD to flush and invalidate caches + */ +#define AMDGPU_IB_FLAG_EMIT_MEM_SYNC (1 << 6) + +struct drm_amdgpu_cs_chunk_ib { + __u32 _pad; + /** AMDGPU_IB_FLAG_* */ + __u32 flags; + /** Virtual address to begin IB execution */ + __u64 va_start; + /** Size of submission */ + __u32 ib_bytes; + /** HW IP to submit to */ + __u32 ip_type; + /** HW IP index of the same type to submit to */ + __u32 ip_instance; + /** Ring index to submit to */ + __u32 ring; +}; + +struct drm_amdgpu_cs_chunk_dep { + __u32 ip_type; + __u32 ip_instance; + __u32 ring; + __u32 ctx_id; + __u64 handle; +}; + +struct drm_amdgpu_cs_chunk_fence { + __u32 handle; + __u32 offset; +}; + +struct drm_amdgpu_cs_chunk_sem { + __u32 handle; +}; + +struct drm_amdgpu_cs_chunk_syncobj { + __u32 handle; + __u32 flags; + __u64 point; +}; + +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ 0 +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD 1 +#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD 2 + +union drm_amdgpu_fence_to_handle { + struct { + struct drm_amdgpu_fence fence; + __u32 what; + __u32 pad; + } in; + struct { + __u32 handle; + } out; +}; + +struct drm_amdgpu_cs_chunk_data { + union { + struct drm_amdgpu_cs_chunk_ib ib_data; + struct drm_amdgpu_cs_chunk_fence fence_data; + }; +}; + +#define AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW 0x1 + +struct drm_amdgpu_cs_chunk_cp_gfx_shadow { + __u64 shadow_va; + __u64 csa_va; + __u64 gds_va; + __u64 flags; +}; + +/* + * Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU + * + */ +#define AMDGPU_IDS_FLAGS_FUSION 0x1 +#define AMDGPU_IDS_FLAGS_PREEMPTION 0x2 +#define AMDGPU_IDS_FLAGS_TMZ 0x4 +#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x8 + +/* + * Query h/w info: Flag identifying VF/PF/PT mode + * + */ +#define AMDGPU_IDS_FLAGS_MODE_MASK 0x300 +#define AMDGPU_IDS_FLAGS_MODE_SHIFT 0x8 +#define AMDGPU_IDS_FLAGS_MODE_PF 0x0 +#define AMDGPU_IDS_FLAGS_MODE_VF 0x1 +#define AMDGPU_IDS_FLAGS_MODE_PT 0x2 + +/* indicate if acceleration can be working */ +#define AMDGPU_INFO_ACCEL_WORKING 0x00 +/* get the crtc_id from the mode object id? */ +#define AMDGPU_INFO_CRTC_FROM_ID 0x01 +/* query hw IP info */ +#define AMDGPU_INFO_HW_IP_INFO 0x02 +/* query hw IP instance count for the specified type */ +#define AMDGPU_INFO_HW_IP_COUNT 0x03 +/* timestamp for GL_ARB_timer_query */ +#define AMDGPU_INFO_TIMESTAMP 0x05 +/* Query the firmware version */ +#define AMDGPU_INFO_FW_VERSION 0x0e +/* Subquery id: Query VCE firmware version */ +#define AMDGPU_INFO_FW_VCE 0x1 +/* Subquery id: Query UVD firmware version */ +#define AMDGPU_INFO_FW_UVD 0x2 +/* Subquery id: Query GMC firmware version */ +#define AMDGPU_INFO_FW_GMC 0x03 +/* Subquery id: Query GFX ME firmware version */ +#define AMDGPU_INFO_FW_GFX_ME 0x04 +/* Subquery id: Query GFX PFP firmware version */ +#define AMDGPU_INFO_FW_GFX_PFP 0x05 +/* Subquery id: Query GFX CE firmware version */ +#define AMDGPU_INFO_FW_GFX_CE 0x06 +/* Subquery id: Query GFX RLC firmware version */ +#define AMDGPU_INFO_FW_GFX_RLC 0x07 +/* Subquery id: Query GFX MEC firmware version */ +#define AMDGPU_INFO_FW_GFX_MEC 0x08 +/* Subquery id: Query SMC firmware version */ +#define AMDGPU_INFO_FW_SMC 0x0a +/* Subquery id: Query SDMA firmware version */ +#define AMDGPU_INFO_FW_SDMA 0x0b +/* Subquery id: Query PSP SOS firmware version */ +#define AMDGPU_INFO_FW_SOS 0x0c +/* Subquery id: Query PSP ASD firmware version */ +#define AMDGPU_INFO_FW_ASD 0x0d +/* Subquery id: Query VCN firmware version */ +#define AMDGPU_INFO_FW_VCN 0x0e +/* Subquery id: Query GFX RLC SRLC firmware version */ +#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL 0x0f +/* Subquery id: Query GFX RLC SRLG firmware version */ +#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM 0x10 +/* Subquery id: Query GFX RLC SRLS firmware version */ +#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11 +/* Subquery id: Query DMCU firmware version */ +#define AMDGPU_INFO_FW_DMCU 0x12 +#define AMDGPU_INFO_FW_TA 0x13 +/* Subquery id: Query DMCUB firmware version */ +#define AMDGPU_INFO_FW_DMCUB 0x14 +/* Subquery id: Query TOC firmware version */ +#define AMDGPU_INFO_FW_TOC 0x15 +/* Subquery id: Query CAP firmware version */ +#define AMDGPU_INFO_FW_CAP 0x16 +/* Subquery id: Query GFX RLCP firmware version */ +#define AMDGPU_INFO_FW_GFX_RLCP 0x17 +/* Subquery id: Query GFX RLCV firmware version */ +#define AMDGPU_INFO_FW_GFX_RLCV 0x18 +/* Subquery id: Query MES_KIQ firmware version */ +#define AMDGPU_INFO_FW_MES_KIQ 0x19 +/* Subquery id: Query MES firmware version */ +#define AMDGPU_INFO_FW_MES 0x1a +/* Subquery id: Query IMU firmware version */ +#define AMDGPU_INFO_FW_IMU 0x1b +/* Subquery id: Query VPE firmware version */ +#define AMDGPU_INFO_FW_VPE 0x1c + +/* number of bytes moved for TTM migration */ +#define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f +/* the used VRAM size */ +#define AMDGPU_INFO_VRAM_USAGE 0x10 +/* the used GTT size */ +#define AMDGPU_INFO_GTT_USAGE 0x11 +/* Information about GDS, etc. resource configuration */ +#define AMDGPU_INFO_GDS_CONFIG 0x13 +/* Query information about VRAM and GTT domains */ +#define AMDGPU_INFO_VRAM_GTT 0x14 +/* Query information about register in MMR address space*/ +#define AMDGPU_INFO_READ_MMR_REG 0x15 +/* Query information about device: rev id, family, etc. */ +#define AMDGPU_INFO_DEV_INFO 0x16 +/* visible vram usage */ +#define AMDGPU_INFO_VIS_VRAM_USAGE 0x17 +/* number of TTM buffer evictions */ +#define AMDGPU_INFO_NUM_EVICTIONS 0x18 +/* Query memory about VRAM and GTT domains */ +#define AMDGPU_INFO_MEMORY 0x19 +/* Query vce clock table */ +#define AMDGPU_INFO_VCE_CLOCK_TABLE 0x1A +/* Query vbios related information */ +#define AMDGPU_INFO_VBIOS 0x1B +/* Subquery id: Query vbios size */ +#define AMDGPU_INFO_VBIOS_SIZE 0x1 +/* Subquery id: Query vbios image */ +#define AMDGPU_INFO_VBIOS_IMAGE 0x2 +/* Subquery id: Query vbios info */ +#define AMDGPU_INFO_VBIOS_INFO 0x3 +/* Query UVD handles */ +#define AMDGPU_INFO_NUM_HANDLES 0x1C +/* Query sensor related information */ +#define AMDGPU_INFO_SENSOR 0x1D +/* Subquery id: Query GPU shader clock */ +#define AMDGPU_INFO_SENSOR_GFX_SCLK 0x1 +/* Subquery id: Query GPU memory clock */ +#define AMDGPU_INFO_SENSOR_GFX_MCLK 0x2 +/* Subquery id: Query GPU temperature */ +#define AMDGPU_INFO_SENSOR_GPU_TEMP 0x3 +/* Subquery id: Query GPU load */ +#define AMDGPU_INFO_SENSOR_GPU_LOAD 0x4 +/* Subquery id: Query average GPU power */ +#define AMDGPU_INFO_SENSOR_GPU_AVG_POWER 0x5 +/* Subquery id: Query northbridge voltage */ +#define AMDGPU_INFO_SENSOR_VDDNB 0x6 +/* Subquery id: Query graphics voltage */ +#define AMDGPU_INFO_SENSOR_VDDGFX 0x7 +/* Subquery id: Query GPU stable pstate shader clock */ +#define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK 0x8 +/* Subquery id: Query GPU stable pstate memory clock */ +#define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK 0x9 +/* Subquery id: Query GPU peak pstate shader clock */ +#define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK 0xa +/* Subquery id: Query GPU peak pstate memory clock */ +#define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK 0xb +/* Subquery id: Query input GPU power */ +#define AMDGPU_INFO_SENSOR_GPU_INPUT_POWER 0xc +/* Number of VRAM page faults on CPU access. */ +#define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS 0x1E +#define AMDGPU_INFO_VRAM_LOST_COUNTER 0x1F +/* query ras mask of enabled features*/ +#define AMDGPU_INFO_RAS_ENABLED_FEATURES 0x20 +/* RAS MASK: UMC (VRAM) */ +#define AMDGPU_INFO_RAS_ENABLED_UMC (1 << 0) +/* RAS MASK: SDMA */ +#define AMDGPU_INFO_RAS_ENABLED_SDMA (1 << 1) +/* RAS MASK: GFX */ +#define AMDGPU_INFO_RAS_ENABLED_GFX (1 << 2) +/* RAS MASK: MMHUB */ +#define AMDGPU_INFO_RAS_ENABLED_MMHUB (1 << 3) +/* RAS MASK: ATHUB */ +#define AMDGPU_INFO_RAS_ENABLED_ATHUB (1 << 4) +/* RAS MASK: PCIE */ +#define AMDGPU_INFO_RAS_ENABLED_PCIE (1 << 5) +/* RAS MASK: HDP */ +#define AMDGPU_INFO_RAS_ENABLED_HDP (1 << 6) +/* RAS MASK: XGMI */ +#define AMDGPU_INFO_RAS_ENABLED_XGMI (1 << 7) +/* RAS MASK: DF */ +#define AMDGPU_INFO_RAS_ENABLED_DF (1 << 8) +/* RAS MASK: SMN */ +#define AMDGPU_INFO_RAS_ENABLED_SMN (1 << 9) +/* RAS MASK: SEM */ +#define AMDGPU_INFO_RAS_ENABLED_SEM (1 << 10) +/* RAS MASK: MP0 */ +#define AMDGPU_INFO_RAS_ENABLED_MP0 (1 << 11) +/* RAS MASK: MP1 */ +#define AMDGPU_INFO_RAS_ENABLED_MP1 (1 << 12) +/* RAS MASK: FUSE */ +#define AMDGPU_INFO_RAS_ENABLED_FUSE (1 << 13) +/* query video encode/decode caps */ +#define AMDGPU_INFO_VIDEO_CAPS 0x21 +/* Subquery id: Decode */ +#define AMDGPU_INFO_VIDEO_CAPS_DECODE 0 +/* Subquery id: Encode */ +#define AMDGPU_INFO_VIDEO_CAPS_ENCODE 1 +/* Query the max number of IBs per gang per submission */ +#define AMDGPU_INFO_MAX_IBS 0x22 +/* query last page fault info */ +#define AMDGPU_INFO_GPUVM_FAULT 0x23 +/* query FW object size and alignment */ +#define AMDGPU_INFO_UQ_FW_AREAS 0x24 + +#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 +#define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff +#define AMDGPU_INFO_MMR_SH_INDEX_SHIFT 8 +#define AMDGPU_INFO_MMR_SH_INDEX_MASK 0xff + +struct drm_amdgpu_query_fw { + /** AMDGPU_INFO_FW_* */ + __u32 fw_type; + /** + * Index of the IP if there are more IPs of + * the same type. + */ + __u32 ip_instance; + /** + * Index of the engine. Whether this is used depends + * on the firmware type. (e.g. MEC, SDMA) + */ + __u32 index; + __u32 _pad; +}; + +/* Input structure for the INFO ioctl */ +struct drm_amdgpu_info { + /* Where the return value will be stored */ + __u64 return_pointer; + /* The size of the return value. Just like "size" in "snprintf", + * it limits how many bytes the kernel can write. */ + __u32 return_size; + /* The query request id. */ + __u32 query; + + union { + struct { + __u32 id; + __u32 _pad; + } mode_crtc; + + struct { + /** AMDGPU_HW_IP_* */ + __u32 type; + /** + * Index of the IP if there are more IPs of the same + * type. Ignored by AMDGPU_INFO_HW_IP_COUNT. + */ + __u32 ip_instance; + } query_hw_ip; + + struct { + __u32 dword_offset; + /** number of registers to read */ + __u32 count; + __u32 instance; + /** For future use, no flags defined so far */ + __u32 flags; + } read_mmr_reg; + + struct drm_amdgpu_query_fw query_fw; + + struct { + __u32 type; + __u32 offset; + } vbios_info; + + struct { + __u32 type; + } sensor_info; + + struct { + __u32 type; + } video_cap; + }; +}; + +struct drm_amdgpu_info_gds { + /** GDS GFX partition size */ + __u32 gds_gfx_partition_size; + /** GDS compute partition size */ + __u32 compute_partition_size; + /** total GDS memory size */ + __u32 gds_total_size; + /** GWS size per GFX partition */ + __u32 gws_per_gfx_partition; + /** GSW size per compute partition */ + __u32 gws_per_compute_partition; + /** OA size per GFX partition */ + __u32 oa_per_gfx_partition; + /** OA size per compute partition */ + __u32 oa_per_compute_partition; + __u32 _pad; +}; + +struct drm_amdgpu_info_vram_gtt { + __u64 vram_size; + __u64 vram_cpu_accessible_size; + __u64 gtt_size; +}; + +struct drm_amdgpu_heap_info { + /** max. physical memory */ + __u64 total_heap_size; + + /** Theoretical max. available memory in the given heap */ + __u64 usable_heap_size; + + /** + * Number of bytes allocated in the heap. This includes all processes + * and private allocations in the kernel. It changes when new buffers + * are allocated, freed, and moved. It cannot be larger than + * heap_size. + */ + __u64 heap_usage; + + /** + * Theoretical possible max. size of buffer which + * could be allocated in the given heap + */ + __u64 max_allocation; +}; + +struct drm_amdgpu_memory_info { + struct drm_amdgpu_heap_info vram; + struct drm_amdgpu_heap_info cpu_accessible_vram; + struct drm_amdgpu_heap_info gtt; +}; + +struct drm_amdgpu_info_firmware { + __u32 ver; + __u32 feature; +}; + +struct drm_amdgpu_info_vbios { + __u8 name[64]; + __u8 vbios_pn[64]; + __u32 version; + __u32 pad; + __u8 vbios_ver_str[32]; + __u8 date[32]; +}; + +#define AMDGPU_VRAM_TYPE_UNKNOWN 0 +#define AMDGPU_VRAM_TYPE_GDDR1 1 +#define AMDGPU_VRAM_TYPE_DDR2 2 +#define AMDGPU_VRAM_TYPE_GDDR3 3 +#define AMDGPU_VRAM_TYPE_GDDR4 4 +#define AMDGPU_VRAM_TYPE_GDDR5 5 +#define AMDGPU_VRAM_TYPE_HBM 6 +#define AMDGPU_VRAM_TYPE_DDR3 7 +#define AMDGPU_VRAM_TYPE_DDR4 8 +#define AMDGPU_VRAM_TYPE_GDDR6 9 +#define AMDGPU_VRAM_TYPE_DDR5 10 +#define AMDGPU_VRAM_TYPE_LPDDR4 11 +#define AMDGPU_VRAM_TYPE_LPDDR5 12 +#define AMDGPU_VRAM_TYPE_HBM3E 13 + +struct drm_amdgpu_info_device { + /** PCI Device ID */ + __u32 device_id; + /** Internal chip revision: A0, A1, etc.) */ + __u32 chip_rev; + __u32 external_rev; + /** Revision id in PCI Config space */ + __u32 pci_rev; + __u32 family; + __u32 num_shader_engines; + __u32 num_shader_arrays_per_engine; + /* in KHz */ + __u32 gpu_counter_freq; + __u64 max_engine_clock; + __u64 max_memory_clock; + /* cu information */ + __u32 cu_active_number; + /* NOTE: cu_ao_mask is INVALID, DON'T use it */ + __u32 cu_ao_mask; + __u32 cu_bitmap[4][4]; + /** Render backend pipe mask. One render backend is CB+DB. */ + __u32 enabled_rb_pipes_mask; + __u32 num_rb_pipes; + __u32 num_hw_gfx_contexts; + /* PCIe version (the smaller of the GPU and the CPU/motherboard) */ + __u32 pcie_gen; + __u64 ids_flags; + /** Starting virtual address for UMDs. */ + __u64 virtual_address_offset; + /** The maximum virtual address */ + __u64 virtual_address_max; + /** Required alignment of virtual addresses. */ + __u32 virtual_address_alignment; + /** Page table entry - fragment size */ + __u32 pte_fragment_size; + __u32 gart_page_size; + /** constant engine ram size*/ + __u32 ce_ram_size; + /** video memory type info*/ + __u32 vram_type; + /** video memory bit width*/ + __u32 vram_bit_width; + /* vce harvesting instance */ + __u32 vce_harvest_config; + /* gfx double offchip LDS buffers */ + __u32 gc_double_offchip_lds_buf; + /* NGG Primitive Buffer */ + __u64 prim_buf_gpu_addr; + /* NGG Position Buffer */ + __u64 pos_buf_gpu_addr; + /* NGG Control Sideband */ + __u64 cntl_sb_buf_gpu_addr; + /* NGG Parameter Cache */ + __u64 param_buf_gpu_addr; + __u32 prim_buf_size; + __u32 pos_buf_size; + __u32 cntl_sb_buf_size; + __u32 param_buf_size; + /* wavefront size*/ + __u32 wave_front_size; + /* shader visible vgprs*/ + __u32 num_shader_visible_vgprs; + /* CU per shader array*/ + __u32 num_cu_per_sh; + /* number of tcc blocks*/ + __u32 num_tcc_blocks; + /* gs vgt table depth*/ + __u32 gs_vgt_table_depth; + /* gs primitive buffer depth*/ + __u32 gs_prim_buffer_depth; + /* max gs wavefront per vgt*/ + __u32 max_gs_waves_per_vgt; + /* PCIe number of lanes (the smaller of the GPU and the CPU/motherboard) + */ + __u32 pcie_num_lanes; + /* always on cu bitmap */ + __u32 cu_ao_bitmap[4][4]; + /** Starting high virtual address for UMDs. */ + __u64 high_va_offset; + /** The maximum high virtual address */ + __u64 high_va_max; + /* gfx10 pa_sc_tile_steering_override */ + __u32 pa_sc_tile_steering_override; + /* disabled TCCs */ + __u64 tcc_disabled_mask; + __u64 min_engine_clock; + __u64 min_memory_clock; + /* The following fields are only set on gfx11+, older chips set 0. */ + __u32 tcp_cache_size; /* AKA GL0, VMEM cache */ + __u32 num_sqc_per_wgp; + __u32 sqc_data_cache_size; /* AKA SMEM cache */ + __u32 sqc_inst_cache_size; + __u32 gl1c_cache_size; + __u32 gl2c_cache_size; + __u64 mall_size; /* AKA infinity cache */ + /* high 32 bits of the rb pipes mask */ + __u32 enabled_rb_pipes_mask_hi; + /* shadow area size for gfx11 */ + __u32 shadow_size; + /* shadow area base virtual alignment for gfx11 */ + __u32 shadow_alignment; + /* context save area size for gfx11 */ + __u32 csa_size; + /* context save area base virtual alignment for gfx11 */ + __u32 csa_alignment; + /* Userq IP mask (1 << AMDGPU_HW_IP_*) */ + __u32 userq_ip_mask; + __u32 pad; +}; + +struct drm_amdgpu_info_hw_ip { + /** Version of h/w IP */ + __u32 hw_ip_version_major; + __u32 hw_ip_version_minor; + /** Capabilities */ + __u64 capabilities_flags; + /** command buffer address start alignment*/ + __u32 ib_start_alignment; + /** command buffer size alignment*/ + __u32 ib_size_alignment; + /** Bitmask of available rings. Bit 0 means ring 0, etc. */ + __u32 available_rings; + /** version info: bits 23:16 major, 15:8 minor, 7:0 revision */ + __u32 ip_discovery_version; + /* Userq available slots */ + __u32 userq_num_slots; +}; + +struct drm_amdgpu_info_num_handles { + /** Max handles as supported by firmware for UVD */ + __u32 uvd_max_handles; + /** Handles currently in use for UVD */ + __u32 uvd_used_handles; +}; + +#define AMDGPU_VCE_CLOCK_TABLE_ENTRIES 6 + +struct drm_amdgpu_info_vce_clock_table_entry { + /** System clock */ + __u32 sclk; + /** Memory clock */ + __u32 mclk; + /** VCE clock */ + __u32 eclk; + __u32 pad; +}; + +struct drm_amdgpu_info_vce_clock_table { + struct drm_amdgpu_info_vce_clock_table_entry + entries[AMDGPU_VCE_CLOCK_TABLE_ENTRIES]; + __u32 num_valid_entries; + __u32 pad; +}; + +/* query video encode/decode caps */ +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2 0 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4 1 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1 2 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC 3 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC 4 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG 5 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9 6 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1 7 +#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT 8 + +struct drm_amdgpu_info_video_codec_info { + __u32 valid; + __u32 max_width; + __u32 max_height; + __u32 max_pixels_per_frame; + __u32 max_level; + __u32 pad; +}; + +struct drm_amdgpu_info_video_caps { + struct drm_amdgpu_info_video_codec_info + codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT]; +}; + +#define AMDGPU_VMHUB_TYPE_MASK 0xff +#define AMDGPU_VMHUB_TYPE_SHIFT 0 +#define AMDGPU_VMHUB_TYPE_GFX 0 +#define AMDGPU_VMHUB_TYPE_MM0 1 +#define AMDGPU_VMHUB_TYPE_MM1 2 +#define AMDGPU_VMHUB_IDX_MASK 0xff00 +#define AMDGPU_VMHUB_IDX_SHIFT 8 + +struct drm_amdgpu_info_gpuvm_fault { + __u64 addr; + __u32 status; + __u32 vmhub; +}; + +struct drm_amdgpu_info_uq_metadata_gfx { + /* shadow area size for gfx11 */ + __u32 shadow_size; + /* shadow area base virtual alignment for gfx11 */ + __u32 shadow_alignment; + /* context save area size for gfx11 */ + __u32 csa_size; + /* context save area base virtual alignment for gfx11 */ + __u32 csa_alignment; +}; + +struct drm_amdgpu_info_uq_metadata { + union { + struct drm_amdgpu_info_uq_metadata_gfx gfx; + }; +}; + +/* + * Supported GPU families + */ +#define AMDGPU_FAMILY_UNKNOWN 0 +#define AMDGPU_FAMILY_SI 110 /* Hainan, Oland, Verde, Pitcairn, Tahiti */ +#define AMDGPU_FAMILY_CI 120 /* Bonaire, Hawaii */ +#define AMDGPU_FAMILY_KV 125 /* Kaveri, Kabini, Mullins */ +#define AMDGPU_FAMILY_VI 130 /* Iceland, Tonga */ +#define AMDGPU_FAMILY_CZ 135 /* Carrizo, Stoney */ +#define AMDGPU_FAMILY_AI 141 /* Vega10 */ +#define AMDGPU_FAMILY_RV 142 /* Raven */ +#define AMDGPU_FAMILY_NV 143 /* Navi10 */ +#define AMDGPU_FAMILY_VGH 144 /* Van Gogh */ +#define AMDGPU_FAMILY_GC_11_0_0 145 /* GC 11.0.0 */ +#define AMDGPU_FAMILY_YC 146 /* Yellow Carp */ +#define AMDGPU_FAMILY_GC_11_0_1 148 /* GC 11.0.1 */ +#define AMDGPU_FAMILY_GC_10_3_6 149 /* GC 10.3.6 */ +#define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ +#define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */ +#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */ + +/* FIXME wrong namespace! */ +struct drm_color_ctm_3x4 { + /* + * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude + * (not two's complement!) format. + */ + __u64 matrix[12]; +}; + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/include/arch/x86_64/drm/amdxdna_accel.h b/include/arch/x86_64/drm/amdxdna_accel.h new file mode 100644 index 00000000..75b74e1d --- /dev/null +++ b/include/arch/x86_64/drm/amdxdna_accel.h @@ -0,0 +1,507 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. + */ + +#ifndef _AMDXDNA_ACCEL_H_ +#define _AMDXDNA_ACCEL_H_ + +#include <linux/stddef.h> +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define AMDXDNA_INVALID_CMD_HANDLE (~0UL) +#define AMDXDNA_INVALID_ADDR (~0UL) +#define AMDXDNA_INVALID_CTX_HANDLE 0 +#define AMDXDNA_INVALID_BO_HANDLE 0 +#define AMDXDNA_INVALID_FENCE_HANDLE 0 + +enum amdxdna_device_type { + AMDXDNA_DEV_TYPE_UNKNOWN = -1, + AMDXDNA_DEV_TYPE_KMQ, +}; + +enum amdxdna_drm_ioctl_id { + DRM_AMDXDNA_CREATE_HWCTX, + DRM_AMDXDNA_DESTROY_HWCTX, + DRM_AMDXDNA_CONFIG_HWCTX, + DRM_AMDXDNA_CREATE_BO, + DRM_AMDXDNA_GET_BO_INFO, + DRM_AMDXDNA_SYNC_BO, + DRM_AMDXDNA_EXEC_CMD, + DRM_AMDXDNA_GET_INFO, + DRM_AMDXDNA_SET_STATE, +}; + +/** + * struct qos_info - QoS information for driver. + * @gops: Giga operations per second. + * @fps: Frames per second. + * @dma_bandwidth: DMA bandwidtha. + * @latency: Frame response latency. + * @frame_exec_time: Frame execution time. + * @priority: Request priority. + * + * User program can provide QoS hints to driver. + */ +struct amdxdna_qos_info { + __u32 gops; + __u32 fps; + __u32 dma_bandwidth; + __u32 latency; + __u32 frame_exec_time; + __u32 priority; +}; + +/** + * struct amdxdna_drm_create_hwctx - Create hardware context. + * @ext: MBZ. + * @ext_flags: MBZ. + * @qos_p: Address of QoS info. + * @umq_bo: BO handle for user mode queue(UMQ). + * @log_buf_bo: BO handle for log buffer. + * @max_opc: Maximum operations per cycle. + * @num_tiles: Number of AIE tiles. + * @mem_size: Size of AIE tile memory. + * @umq_doorbell: Returned offset of doorbell associated with UMQ. + * @handle: Returned hardware context handle. + * @syncobj_handle: Returned syncobj handle for command completion. + */ +struct amdxdna_drm_create_hwctx { + __u64 ext; + __u64 ext_flags; + __u64 qos_p; + __u32 umq_bo; + __u32 log_buf_bo; + __u32 max_opc; + __u32 num_tiles; + __u32 mem_size; + __u32 umq_doorbell; + __u32 handle; + __u32 syncobj_handle; +}; + +/** + * struct amdxdna_drm_destroy_hwctx - Destroy hardware context. + * @handle: Hardware context handle. + * @pad: MBZ. + */ +struct amdxdna_drm_destroy_hwctx { + __u32 handle; + __u32 pad; +}; + +/** + * struct amdxdna_cu_config - configuration for one CU + * @cu_bo: CU configuration buffer bo handle. + * @cu_func: Function of a CU. + * @pad: MBZ. + */ +struct amdxdna_cu_config { + __u32 cu_bo; + __u8 cu_func; + __u8 pad[3]; +}; + +/** + * struct amdxdna_hwctx_param_config_cu - configuration for CUs in hardware + * context + * @num_cus: Number of CUs to configure. + * @pad: MBZ. + * @cu_configs: Array of CU configurations of struct amdxdna_cu_config. + */ +struct amdxdna_hwctx_param_config_cu { + __u16 num_cus; + __u16 pad[3]; + struct amdxdna_cu_config cu_configs[] __counted_by(num_cus); +}; + +enum amdxdna_drm_config_hwctx_param { + DRM_AMDXDNA_HWCTX_CONFIG_CU, + DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF, + DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF, +}; + +/** + * struct amdxdna_drm_config_hwctx - Configure hardware context. + * @handle: hardware context handle. + * @param_type: Value in enum amdxdna_drm_config_hwctx_param. Specifies the + * structure passed in via param_val. + * @param_val: A structure specified by the param_type struct member. + * @param_val_size: Size of the parameter buffer pointed to by the param_val. + * If param_val is not a pointer, driver can ignore this. + * @pad: MBZ. + * + * Note: if the param_val is a pointer pointing to a buffer, the maximum size + * of the buffer is 4KiB(PAGE_SIZE). + */ +struct amdxdna_drm_config_hwctx { + __u32 handle; + __u32 param_type; + __u64 param_val; + __u32 param_val_size; + __u32 pad; +}; + +enum amdxdna_bo_type { + AMDXDNA_BO_INVALID = 0, + AMDXDNA_BO_SHMEM, + AMDXDNA_BO_DEV_HEAP, + AMDXDNA_BO_DEV, + AMDXDNA_BO_CMD, +}; + +/** + * struct amdxdna_drm_create_bo - Create a buffer object. + * @flags: Buffer flags. MBZ. + * @vaddr: User VA of buffer if applied. MBZ. + * @size: Size in bytes. + * @type: Buffer type. + * @handle: Returned DRM buffer object handle. + */ +struct amdxdna_drm_create_bo { + __u64 flags; + __u64 vaddr; + __u64 size; + __u32 type; + __u32 handle; +}; + +/** + * struct amdxdna_drm_get_bo_info - Get buffer object information. + * @ext: MBZ. + * @ext_flags: MBZ. + * @handle: DRM buffer object handle. + * @pad: MBZ. + * @map_offset: Returned DRM fake offset for mmap(). + * @vaddr: Returned user VA of buffer. 0 in case user needs mmap(). + * @xdna_addr: Returned XDNA device virtual address. + */ +struct amdxdna_drm_get_bo_info { + __u64 ext; + __u64 ext_flags; + __u32 handle; + __u32 pad; + __u64 map_offset; + __u64 vaddr; + __u64 xdna_addr; +}; + +/** + * struct amdxdna_drm_sync_bo - Sync buffer object. + * @handle: Buffer object handle. + * @direction: Direction of sync, can be from device or to device. + * @offset: Offset in the buffer to sync. + * @size: Size in bytes. + */ +struct amdxdna_drm_sync_bo { + __u32 handle; +#define SYNC_DIRECT_TO_DEVICE 0U +#define SYNC_DIRECT_FROM_DEVICE 1U + __u32 direction; + __u64 offset; + __u64 size; +}; + +enum amdxdna_cmd_type { + AMDXDNA_CMD_SUBMIT_EXEC_BUF = 0, + AMDXDNA_CMD_SUBMIT_DEPENDENCY, + AMDXDNA_CMD_SUBMIT_SIGNAL, +}; + +/** + * struct amdxdna_drm_exec_cmd - Execute command. + * @ext: MBZ. + * @ext_flags: MBZ. + * @hwctx: Hardware context handle. + * @type: One of command type in enum amdxdna_cmd_type. + * @cmd_handles: Array of command handles or the command handle itself + * in case of just one. + * @args: Array of arguments for all command handles. + * @cmd_count: Number of command handles in the cmd_handles array. + * @arg_count: Number of arguments in the args array. + * @seq: Returned sequence number for this command. + */ +struct amdxdna_drm_exec_cmd { + __u64 ext; + __u64 ext_flags; + __u32 hwctx; + __u32 type; + __u64 cmd_handles; + __u64 args; + __u32 cmd_count; + __u32 arg_count; + __u64 seq; +}; + +/** + * struct amdxdna_drm_query_aie_status - Query the status of the AIE hardware + * @buffer: The user space buffer that will return the AIE status. + * @buffer_size: The size of the user space buffer. + * @cols_filled: A bitmap of AIE columns whose data has been returned in the + * buffer. + */ +struct amdxdna_drm_query_aie_status { + __u64 buffer; /* out */ + __u32 buffer_size; /* in */ + __u32 cols_filled; /* out */ +}; + +/** + * struct amdxdna_drm_query_aie_version - Query the version of the AIE hardware + * @major: The major version number. + * @minor: The minor version number. + */ +struct amdxdna_drm_query_aie_version { + __u32 major; /* out */ + __u32 minor; /* out */ +}; + +/** + * struct amdxdna_drm_query_aie_tile_metadata - Query the metadata of AIE tile + * (core, mem, shim) + * @row_count: The number of rows. + * @row_start: The starting row number. + * @dma_channel_count: The number of dma channels. + * @lock_count: The number of locks. + * @event_reg_count: The number of events. + * @pad: Structure padding. + */ +struct amdxdna_drm_query_aie_tile_metadata { + __u16 row_count; + __u16 row_start; + __u16 dma_channel_count; + __u16 lock_count; + __u16 event_reg_count; + __u16 pad[3]; +}; + +/** + * struct amdxdna_drm_query_aie_metadata - Query the metadata of the AIE + * hardware + * @col_size: The size of a column in bytes. + * @cols: The total number of columns. + * @rows: The total number of rows. + * @version: The version of the AIE hardware. + * @core: The metadata for all core tiles. + * @mem: The metadata for all mem tiles. + * @shim: The metadata for all shim tiles. + */ +struct amdxdna_drm_query_aie_metadata { + __u32 col_size; + __u16 cols; + __u16 rows; + struct amdxdna_drm_query_aie_version version; + struct amdxdna_drm_query_aie_tile_metadata core; + struct amdxdna_drm_query_aie_tile_metadata mem; + struct amdxdna_drm_query_aie_tile_metadata shim; +}; + +/** + * struct amdxdna_drm_query_clock - Metadata for a clock + * @name: The clock name. + * @freq_mhz: The clock frequency. + * @pad: Structure padding. + */ +struct amdxdna_drm_query_clock { + __u8 name[16]; + __u32 freq_mhz; + __u32 pad; +}; + +/** + * struct amdxdna_drm_query_clock_metadata - Query metadata for clocks + * @mp_npu_clock: The metadata for MP-NPU clock. + * @h_clock: The metadata for H clock. + */ +struct amdxdna_drm_query_clock_metadata { + struct amdxdna_drm_query_clock mp_npu_clock; + struct amdxdna_drm_query_clock h_clock; +}; + +enum amdxdna_sensor_type { AMDXDNA_SENSOR_TYPE_POWER }; + +/** + * struct amdxdna_drm_query_sensor - The data for single sensor. + * @label: The name for a sensor. + * @input: The current value of the sensor. + * @max: The maximum value possible for the sensor. + * @average: The average value of the sensor. + * @highest: The highest recorded sensor value for this driver load for the + * sensor. + * @status: The sensor status. + * @units: The sensor units. + * @unitm: Translates value member variables into the correct unit via (pow(10, + * unitm) * value). + * @type: The sensor type from enum amdxdna_sensor_type. + * @pad: Structure padding. + */ +struct amdxdna_drm_query_sensor { + __u8 label[64]; + __u32 input; + __u32 max; + __u32 average; + __u32 highest; + __u8 status[64]; + __u8 units[16]; + __s8 unitm; + __u8 type; + __u8 pad[6]; +}; + +/** + * struct amdxdna_drm_query_hwctx - The data for single context. + * @context_id: The ID for this context. + * @start_col: The starting column for the partition assigned to this context. + * @num_col: The number of columns in the partition assigned to this context. + * @pad: Structure padding. + * @pid: The Process ID of the process that created this context. + * @command_submissions: The number of commands submitted to this context. + * @command_completions: The number of commands completed by this context. + * @migrations: The number of times this context has been moved to a different + * partition. + * @preemptions: The number of times this context has been preempted by another + * context in the same partition. + * @errors: The errors for this context. + */ +struct amdxdna_drm_query_hwctx { + __u32 context_id; + __u32 start_col; + __u32 num_col; + __u32 pad; + __s64 pid; + __u64 command_submissions; + __u64 command_completions; + __u64 migrations; + __u64 preemptions; + __u64 errors; +}; + +enum amdxdna_power_mode_type { + POWER_MODE_DEFAULT, /* Fallback to calculated DPM */ + POWER_MODE_LOW, /* Set frequency to lowest DPM */ + POWER_MODE_MEDIUM, /* Set frequency to medium DPM */ + POWER_MODE_HIGH, /* Set frequency to highest DPM */ + POWER_MODE_TURBO, /* Maximum power */ +}; + +/** + * struct amdxdna_drm_get_power_mode - Get the configured power mode + * @power_mode: The mode type from enum amdxdna_power_mode_type + * @pad: Structure padding. + */ +struct amdxdna_drm_get_power_mode { + __u8 power_mode; + __u8 pad[7]; +}; + +/** + * struct amdxdna_drm_query_firmware_version - Query the firmware version + * @major: The major version number + * @minor: The minor version number + * @patch: The patch level version number + * @build: The build ID + */ +struct amdxdna_drm_query_firmware_version { + __u32 major; /* out */ + __u32 minor; /* out */ + __u32 patch; /* out */ + __u32 build; /* out */ +}; + +enum amdxdna_drm_get_param { + DRM_AMDXDNA_QUERY_AIE_STATUS, + DRM_AMDXDNA_QUERY_AIE_METADATA, + DRM_AMDXDNA_QUERY_AIE_VERSION, + DRM_AMDXDNA_QUERY_CLOCK_METADATA, + DRM_AMDXDNA_QUERY_SENSORS, + DRM_AMDXDNA_QUERY_HW_CONTEXTS, + DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8, + DRM_AMDXDNA_GET_POWER_MODE, +}; + +/** + * struct amdxdna_drm_get_info - Get some information from the AIE hardware. + * @param: Value in enum amdxdna_drm_get_param. Specifies the structure passed + * in the buffer. + * @buffer_size: Size of the input buffer. Size needed/written by the kernel. + * @buffer: A structure specified by the param struct member. + */ +struct amdxdna_drm_get_info { + __u32 param; /* in */ + __u32 buffer_size; /* in/out */ + __u64 buffer; /* in/out */ +}; + +enum amdxdna_drm_set_param { + DRM_AMDXDNA_SET_POWER_MODE, + DRM_AMDXDNA_WRITE_AIE_MEM, + DRM_AMDXDNA_WRITE_AIE_REG, +}; + +/** + * struct amdxdna_drm_set_state - Set the state of the AIE hardware. + * @param: Value in enum amdxdna_drm_set_param. + * @buffer_size: Size of the input param. + * @buffer: Pointer to the input param. + */ +struct amdxdna_drm_set_state { + __u32 param; /* in */ + __u32 buffer_size; /* in */ + __u64 buffer; /* in */ +}; + +/** + * struct amdxdna_drm_set_power_mode - Set the power mode of the AIE hardware + * @power_mode: The sensor type from enum amdxdna_power_mode_type + * @pad: MBZ. + */ +struct amdxdna_drm_set_power_mode { + __u8 power_mode; + __u8 pad[7]; +}; + +#define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \ + struct amdxdna_drm_create_hwctx) + +#define DRM_IOCTL_AMDXDNA_DESTROY_HWCTX \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_DESTROY_HWCTX, \ + struct amdxdna_drm_destroy_hwctx) + +#define DRM_IOCTL_AMDXDNA_CONFIG_HWCTX \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CONFIG_HWCTX, \ + struct amdxdna_drm_config_hwctx) + +#define DRM_IOCTL_AMDXDNA_CREATE_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_BO, \ + struct amdxdna_drm_create_bo) + +#define DRM_IOCTL_AMDXDNA_GET_BO_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_BO_INFO, \ + struct amdxdna_drm_get_bo_info) + +#define DRM_IOCTL_AMDXDNA_SYNC_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SYNC_BO, \ + struct amdxdna_drm_sync_bo) + +#define DRM_IOCTL_AMDXDNA_EXEC_CMD \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_EXEC_CMD, \ + struct amdxdna_drm_exec_cmd) + +#define DRM_IOCTL_AMDXDNA_GET_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_INFO, \ + struct amdxdna_drm_get_info) + +#define DRM_IOCTL_AMDXDNA_SET_STATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SET_STATE, \ + struct amdxdna_drm_set_state) + +#if defined(__cplusplus) +} /* extern c end */ +#endif + +#endif /* _AMDXDNA_ACCEL_H_ */ diff --git a/include/arch/x86_64/drm/armada_drm.h b/include/arch/x86_64/drm/armada_drm.h new file mode 100644 index 00000000..449c9b4f --- /dev/null +++ b/include/arch/x86_64/drm/armada_drm.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2012 Russell King + * With inspiration from the i915 driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef DRM_ARMADA_IOCTL_H +#define DRM_ARMADA_IOCTL_H + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_ARMADA_GEM_CREATE 0x00 +#define DRM_ARMADA_GEM_MMAP 0x02 +#define DRM_ARMADA_GEM_PWRITE 0x03 + +#define ARMADA_IOCTL(dir, name, str) \ + DRM_##dir(DRM_COMMAND_BASE + DRM_ARMADA_##name, struct drm_armada_##str) + +struct drm_armada_gem_create { + __u32 handle; + __u32 size; +}; +#define DRM_IOCTL_ARMADA_GEM_CREATE ARMADA_IOCTL(IOWR, GEM_CREATE, gem_create) + +struct drm_armada_gem_mmap { + __u32 handle; + __u32 pad; + __u64 offset; + __u64 size; + __u64 addr; +}; +#define DRM_IOCTL_ARMADA_GEM_MMAP ARMADA_IOCTL(IOWR, GEM_MMAP, gem_mmap) + +struct drm_armada_gem_pwrite { + __u64 ptr; + __u32 handle; + __u32 offset; + __u32 size; +}; +#define DRM_IOCTL_ARMADA_GEM_PWRITE ARMADA_IOCTL(IOW, GEM_PWRITE, gem_pwrite) + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/include/arch/x86_64/drm/asahi_drm.h b/include/arch/x86_64/drm/asahi_drm.h new file mode 100644 index 00000000..bda7ce43 --- /dev/null +++ b/include/arch/x86_64/drm/asahi_drm.h @@ -0,0 +1,1199 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) The Asahi Linux Contributors + * Copyright (C) 2018-2023 Collabora Ltd. + * Copyright (C) 2014-2018 Broadcom + */ +#ifndef _ASAHI_DRM_H_ +#define _ASAHI_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/** + * DOC: Introduction to the Asahi UAPI + * + * This documentation describes the Asahi IOCTLs. + * + * Just a few generic rules about the data passed to the Asahi IOCTLs (cribbed + * from Panthor): + * + * - Structures must be aligned on 64-bit/8-byte. If the object is not + * naturally aligned, a padding field must be added. + * - Fields must be explicitly aligned to their natural type alignment with + * pad[0..N] fields. + * - All padding fields will be checked by the driver to make sure they are + * zeroed. + * - Flags can be added, but not removed/replaced. + * - New fields can be added to the main structures (the structures + * directly passed to the ioctl). Those fields can be added at the end of + * the structure, or replace existing padding fields. Any new field being + * added must preserve the behavior that existed before those fields were + * added when a value of zero is passed. + * - New fields can be added to indirect objects (objects pointed by the + * main structure), iff those objects are passed a size to reflect the + * size known by the userspace driver (see + * drm_asahi_cmd_header::size). + * - If the kernel driver is too old to know some fields, those will be + * ignored if zero, and otherwise rejected (and so will be zero on output). + * - If userspace is too old to know some fields, those will be zeroed + * (input) before the structure is parsed by the kernel driver. + * - Each new flag/field addition must come with a driver version update so + * the userspace driver doesn't have to guess which flags are supported. + * - Structures should not contain unions, as this would defeat the + * extensibility of such structures. + * - IOCTLs can't be removed or replaced. New IOCTL IDs should be placed + * at the end of the drm_asahi_ioctl_id enum. + */ + +/** + * enum drm_asahi_ioctl_id - IOCTL IDs + * + * Place new ioctls at the end, don't re-order, don't replace or remove entries. + * + * These IDs are not meant to be used directly. Use the DRM_IOCTL_ASAHI_xxx + * definitions instead. + */ +enum drm_asahi_ioctl_id { + /** @DRM_ASAHI_GET_PARAMS: Query device properties. */ + DRM_ASAHI_GET_PARAMS = 0, + + /** @DRM_ASAHI_GET_TIME: Query device time. */ + DRM_ASAHI_GET_TIME, + + /** @DRM_ASAHI_VM_CREATE: Create a GPU VM address space. */ + DRM_ASAHI_VM_CREATE, + + /** @DRM_ASAHI_VM_DESTROY: Destroy a VM. */ + DRM_ASAHI_VM_DESTROY, + + /** @DRM_ASAHI_VM_BIND: Bind/unbind memory to a VM. */ + DRM_ASAHI_VM_BIND, + + /** @DRM_ASAHI_GEM_CREATE: Create a buffer object. */ + DRM_ASAHI_GEM_CREATE, + + /** + * @DRM_ASAHI_GEM_MMAP_OFFSET: Get offset to pass to mmap() to map a + * given GEM handle. + */ + DRM_ASAHI_GEM_MMAP_OFFSET, + + /** @DRM_ASAHI_GEM_BIND_OBJECT: Bind memory as a special object */ + DRM_ASAHI_GEM_BIND_OBJECT, + + /** @DRM_ASAHI_QUEUE_CREATE: Create a scheduling queue. */ + DRM_ASAHI_QUEUE_CREATE, + + /** @DRM_ASAHI_QUEUE_DESTROY: Destroy a scheduling queue. */ + DRM_ASAHI_QUEUE_DESTROY, + + /** @DRM_ASAHI_SUBMIT: Submit commands to a queue. */ + DRM_ASAHI_SUBMIT, +}; + +#define DRM_ASAHI_MAX_CLUSTERS 64 + +/** + * struct drm_asahi_params_global - Global parameters. + * + * This struct may be queried by drm_asahi_get_params. + */ +struct drm_asahi_params_global { + /** @features: Feature bits from drm_asahi_feature */ + __u64 features; + + /** @gpu_generation: GPU generation, e.g. 13 for G13G */ + __u32 gpu_generation; + + /** @gpu_variant: GPU variant as a character, e.g. 'C' for G13C */ + __u32 gpu_variant; + + /** + * @gpu_revision: GPU revision in BCD, e.g. 0x00 for 'A0' or + * 0x21 for 'C1' + */ + __u32 gpu_revision; + + /** @chip_id: Chip ID in BCD, e.g. 0x8103 for T8103 */ + __u32 chip_id; + + /** @num_dies: Number of dies in the SoC */ + __u32 num_dies; + + /** @num_clusters_total: Number of GPU clusters (across all dies) */ + __u32 num_clusters_total; + + /** + * @num_cores_per_cluster: Number of logical cores per cluster + * (including inactive/nonexistent) + */ + __u32 num_cores_per_cluster; + + /** @max_frequency_khz: Maximum GPU core clock frequency */ + __u32 max_frequency_khz; + + /** @core_masks: Bitmask of present/enabled cores per cluster */ + __u64 core_masks[DRM_ASAHI_MAX_CLUSTERS]; + + /** + * @vm_start: VM range start VMA. Together with @vm_end, this defines + * the window of valid GPU VAs. Userspace is expected to subdivide VAs + * out of this window. + * + * This window contains all virtual addresses that userspace needs to + * know about. There may be kernel-internal GPU VAs outside this range, + * but that detail is not relevant here. + */ + __u64 vm_start; + + /** @vm_end: VM range end VMA */ + __u64 vm_end; + + /** + * @vm_kernel_min_size: Minimum kernel VMA window size. + * + * When creating a VM, userspace is required to carve out a section of + * virtual addresses (within the range given by @vm_start and + * @vm_end). The kernel will allocate various internal structures + * within the specified VA range. + * + * Allowing userspace to choose the VA range for the kernel, rather than + * the kernel reserving VAs and requiring userspace to cope, can assist + * in implementing SVM. + */ + __u64 vm_kernel_min_size; + + /** + * @max_commands_per_submission: Maximum number of supported commands + * per submission. This mirrors firmware limits. Userspace must split up + * larger command buffers, which may require inserting additional + * synchronization. + */ + __u32 max_commands_per_submission; + + /** + * @max_attachments: Maximum number of drm_asahi_attachment's per + * command + */ + __u32 max_attachments; + + /** + * @command_timestamp_frequency_hz: Timebase frequency for timestamps + * written during command execution, specified via drm_asahi_timestamp + * structures. As this rate is controlled by the firmware, it is a + * queryable parameter. + * + * Userspace must divide by this frequency to convert timestamps to + * seconds, rather than hardcoding a particular firmware's rate. + */ + __u64 command_timestamp_frequency_hz; +}; + +/** + * enum drm_asahi_feature - Feature bits + * + * This covers only features that userspace cannot infer from the architecture + * version. Most features don't need to be here. + */ +enum drm_asahi_feature { + /** + * @DRM_ASAHI_FEATURE_SOFT_FAULTS: GPU has "soft fault" enabled. Shader + * loads of unmapped memory will return zero. Shader stores to unmapped + * memory will be silently discarded. Note that only shader load/store + * is affected. Other hardware units are not affected, notably including + * texture sampling. + * + * Soft fault is set when initializing the GPU and cannot be runtime + * toggled. Therefore, it is exposed as a feature bit and not a + * userspace-settable flag on the VM. When soft fault is enabled, + * userspace can speculate memory accesses more aggressively. + */ + DRM_ASAHI_FEATURE_SOFT_FAULTS = (1UL) << 0, +}; + +/** + * struct drm_asahi_get_params - Arguments passed to DRM_IOCTL_ASAHI_GET_PARAMS + */ +struct drm_asahi_get_params { + /** @param_group: Parameter group to fetch (MBZ) */ + __u32 param_group; + + /** @pad: MBZ */ + __u32 pad; + + /** @pointer: User pointer to write parameter struct */ + __u64 pointer; + + /** + * @size: Size of the user buffer. In case of older userspace, this may + * be less than sizeof(struct drm_asahi_params_global). The kernel will + * not write past the length specified here, allowing extensibility. + */ + __u64 size; +}; + +/** + * struct drm_asahi_vm_create - Arguments passed to DRM_IOCTL_ASAHI_VM_CREATE + */ +struct drm_asahi_vm_create { + /** + * @kernel_start: Start of the kernel-reserved address range. See + * drm_asahi_params_global::vm_kernel_min_size. + * + * Both @kernel_start and @kernel_end must be within the range of + * valid VAs given by drm_asahi_params_global::vm_start and + * drm_asahi_params_global::vm_end. The size of the kernel range + * (@kernel_end - @kernel_start) must be at least + * drm_asahi_params_global::vm_kernel_min_size. + * + * Userspace must not bind any memory on this VM into this reserved + * range, it is for kernel use only. + */ + __u64 kernel_start; + + /** + * @kernel_end: End of the kernel-reserved address range. See + * @kernel_start. + */ + __u64 kernel_end; + + /** @vm_id: Returned VM ID */ + __u32 vm_id; + + /** @pad: MBZ */ + __u32 pad; +}; + +/** + * struct drm_asahi_vm_destroy - Arguments passed to DRM_IOCTL_ASAHI_VM_DESTROY + */ +struct drm_asahi_vm_destroy { + /** @vm_id: VM ID to be destroyed */ + __u32 vm_id; + + /** @pad: MBZ */ + __u32 pad; +}; + +/** + * enum drm_asahi_gem_flags - Flags for GEM creation + */ +enum drm_asahi_gem_flags { + /** + * @DRM_ASAHI_GEM_WRITEBACK: BO should be CPU-mapped as writeback. + * + * Map as writeback instead of write-combine. This optimizes for CPU + * reads. + */ + DRM_ASAHI_GEM_WRITEBACK = (1L << 0), + + /** + * @DRM_ASAHI_GEM_VM_PRIVATE: BO is private to this GPU VM (no exports). + */ + DRM_ASAHI_GEM_VM_PRIVATE = (1L << 1), +}; + +/** + * struct drm_asahi_gem_create - Arguments passed to DRM_IOCTL_ASAHI_GEM_CREATE + */ +struct drm_asahi_gem_create { + /** @size: Size of the BO */ + __u64 size; + + /** @flags: Combination of drm_asahi_gem_flags flags. */ + __u32 flags; + + /** + * @vm_id: VM ID to assign to the BO, if DRM_ASAHI_GEM_VM_PRIVATE is set + */ + __u32 vm_id; + + /** @handle: Returned GEM handle for the BO */ + __u32 handle; + + /** @pad: MBZ */ + __u32 pad; +}; + +/** + * struct drm_asahi_gem_mmap_offset - Arguments passed to + * DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET + */ +struct drm_asahi_gem_mmap_offset { + /** @handle: Handle for the object being mapped. */ + __u32 handle; + + /** @flags: Must be zero */ + __u32 flags; + + /** @offset: The fake offset to use for subsequent mmap call */ + __u64 offset; +}; + +/** + * enum drm_asahi_bind_flags - Flags for GEM binding + */ +enum drm_asahi_bind_flags { + /** + * @DRM_ASAHI_BIND_UNBIND: Instead of binding a GEM object to the range, + * simply unbind the GPU VMA range. + */ + DRM_ASAHI_BIND_UNBIND = (1L << 0), + + /** @DRM_ASAHI_BIND_READ: Map BO with GPU read permission */ + DRM_ASAHI_BIND_READ = (1L << 1), + + /** @DRM_ASAHI_BIND_WRITE: Map BO with GPU write permission */ + DRM_ASAHI_BIND_WRITE = (1L << 2), + + /** + * @DRM_ASAHI_BIND_SINGLE_PAGE: Map a single page of the BO repeatedly + * across the VA range. + * + * This is useful to fill a VA range with scratch pages or zero pages. + * It is intended as a mechanism to accelerate sparse. + */ + DRM_ASAHI_BIND_SINGLE_PAGE = (1L << 3), +}; + +/** + * struct drm_asahi_gem_bind_op - Description of a single GEM bind operation. + */ +struct drm_asahi_gem_bind_op { + /** @flags: Combination of drm_asahi_bind_flags flags. */ + __u32 flags; + + /** @handle: GEM object to bind (except for UNBIND) */ + __u32 handle; + + /** + * @offset: Offset into the object (except for UNBIND). + * + * For a regular bind, this is the beginning of the region of the GEM + * object to bind. + * + * For a single-page bind, this is the offset to the single page that + * will be repeatedly bound. + * + * Must be page-size aligned. + */ + __u64 offset; + + /** + * @range: Number of bytes to bind/unbind to @addr. + * + * Must be page-size aligned. + */ + __u64 range; + + /** + * @addr: Address to bind to. + * + * Must be page-size aligned. + */ + __u64 addr; +}; + +/** + * struct drm_asahi_vm_bind - Arguments passed to + * DRM_IOCTL_ASAHI_VM_BIND + */ +struct drm_asahi_vm_bind { + /** @vm_id: The ID of the VM to bind to */ + __u32 vm_id; + + /** @num_binds: number of binds in this IOCTL. */ + __u32 num_binds; + + /** + * @stride: Stride in bytes between consecutive binds. This allows + * extensibility of drm_asahi_gem_bind_op. + */ + __u32 stride; + + /** @pad: MBZ */ + __u32 pad; + + /** + * @userptr: User pointer to an array of @num_binds structures of type + * @drm_asahi_gem_bind_op and size @stride bytes. + */ + __u64 userptr; +}; + +/** + * enum drm_asahi_bind_object_op - Special object bind operation + */ +enum drm_asahi_bind_object_op { + /** @DRM_ASAHI_BIND_OBJECT_OP_BIND: Bind a BO as a special GPU object */ + DRM_ASAHI_BIND_OBJECT_OP_BIND = 0, + + /** @DRM_ASAHI_BIND_OBJECT_OP_UNBIND: Unbind a special GPU object */ + DRM_ASAHI_BIND_OBJECT_OP_UNBIND = 1, +}; + +/** + * enum drm_asahi_bind_object_flags - Special object bind flags + */ +enum drm_asahi_bind_object_flags { + /** + * @DRM_ASAHI_BIND_OBJECT_USAGE_TIMESTAMPS: Map a BO as a timestamp + * buffer. + */ + DRM_ASAHI_BIND_OBJECT_USAGE_TIMESTAMPS = (1L << 0), +}; + +/** + * struct drm_asahi_gem_bind_object - Arguments passed to + * DRM_IOCTL_ASAHI_GEM_BIND_OBJECT + */ +struct drm_asahi_gem_bind_object { + /** @op: Bind operation (enum drm_asahi_bind_object_op) */ + __u32 op; + + /** @flags: Combination of drm_asahi_bind_object_flags flags. */ + __u32 flags; + + /** @handle: GEM object to bind/unbind (BIND) */ + __u32 handle; + + /** @vm_id: The ID of the VM to operate on (MBZ currently) */ + __u32 vm_id; + + /** @offset: Offset into the object (BIND only) */ + __u64 offset; + + /** @range: Number of bytes to bind/unbind (BIND only) */ + __u64 range; + + /** @object_handle: Object handle (out for BIND, in for UNBIND) */ + __u32 object_handle; + + /** @pad: MBZ */ + __u32 pad; +}; + +/** + * enum drm_asahi_cmd_type - Command type + */ +enum drm_asahi_cmd_type { + /** + * @DRM_ASAHI_CMD_RENDER: Render command, executing on the render + * subqueue. Combined vertex and fragment operation. + * + * Followed by a @drm_asahi_cmd_render payload. + */ + DRM_ASAHI_CMD_RENDER = 0, + + /** + * @DRM_ASAHI_CMD_COMPUTE: Compute command on the compute subqueue. + * + * Followed by a @drm_asahi_cmd_compute payload. + */ + DRM_ASAHI_CMD_COMPUTE = 1, + + /** + * @DRM_ASAHI_SET_VERTEX_ATTACHMENTS: Software command to set + * attachments for subsequent vertex shaders in the same submit. + * + * Followed by (possibly multiple) @drm_asahi_attachment payloads. + */ + DRM_ASAHI_SET_VERTEX_ATTACHMENTS = 2, + + /** + * @DRM_ASAHI_SET_FRAGMENT_ATTACHMENTS: Software command to set + * attachments for subsequent fragment shaders in the same submit. + * + * Followed by (possibly multiple) @drm_asahi_attachment payloads. + */ + DRM_ASAHI_SET_FRAGMENT_ATTACHMENTS = 3, + + /** + * @DRM_ASAHI_SET_COMPUTE_ATTACHMENTS: Software command to set + * attachments for subsequent compute shaders in the same submit. + * + * Followed by (possibly multiple) @drm_asahi_attachment payloads. + */ + DRM_ASAHI_SET_COMPUTE_ATTACHMENTS = 4, +}; + +/** + * enum drm_asahi_priority - Scheduling queue priority. + * + * These priorities are forwarded to the firmware to influence firmware + * scheduling. The exact policy is ultimately decided by firmware, but + * these enums allow userspace to communicate the intentions. + */ +enum drm_asahi_priority { + /** @DRM_ASAHI_PRIORITY_LOW: Low priority queue. */ + DRM_ASAHI_PRIORITY_LOW = 0, + + /** @DRM_ASAHI_PRIORITY_MEDIUM: Medium priority queue. */ + DRM_ASAHI_PRIORITY_MEDIUM = 1, + + /** + * @DRM_ASAHI_PRIORITY_HIGH: High priority queue. + * + * Reserved for future extension. + */ + DRM_ASAHI_PRIORITY_HIGH = 2, + + /** + * @DRM_ASAHI_PRIORITY_REALTIME: Real-time priority queue. + * + * Reserved for future extension. + */ + DRM_ASAHI_PRIORITY_REALTIME = 3, +}; + +/** + * struct drm_asahi_queue_create - Arguments passed to + * DRM_IOCTL_ASAHI_QUEUE_CREATE + */ +struct drm_asahi_queue_create { + /** @flags: MBZ */ + __u32 flags; + + /** @vm_id: The ID of the VM this queue is bound to */ + __u32 vm_id; + + /** @priority: One of drm_asahi_priority */ + __u32 priority; + + /** @queue_id: The returned queue ID */ + __u32 queue_id; + + /** + * @usc_exec_base: GPU base address for all USC binaries (shaders) on + * this queue. USC addresses are 32-bit relative to this 64-bit base. + * + * This sets the following registers on all queue commands: + * + * USC_EXEC_BASE_TA (vertex) + * USC_EXEC_BASE_ISP (fragment) + * USC_EXEC_BASE_CP (compute) + * + * While the hardware lets us configure these independently per command, + * we do not have a use case for this. Instead, we expect userspace to + * fix a 4GiB VA carveout for USC memory and pass its base address here. + */ + __u64 usc_exec_base; +}; + +/** + * struct drm_asahi_queue_destroy - Arguments passed to + * DRM_IOCTL_ASAHI_QUEUE_DESTROY + */ +struct drm_asahi_queue_destroy { + /** @queue_id: The queue ID to be destroyed */ + __u32 queue_id; + + /** @pad: MBZ */ + __u32 pad; +}; + +/** + * enum drm_asahi_sync_type - Sync item type + */ +enum drm_asahi_sync_type { + /** @DRM_ASAHI_SYNC_SYNCOBJ: Binary sync object */ + DRM_ASAHI_SYNC_SYNCOBJ = 0, + + /** @DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ: Timeline sync object */ + DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ = 1, +}; + +/** + * struct drm_asahi_sync - Sync item + */ +struct drm_asahi_sync { + /** @sync_type: One of drm_asahi_sync_type */ + __u32 sync_type; + + /** @handle: The sync object handle */ + __u32 handle; + + /** @timeline_value: Timeline value for timeline sync objects */ + __u64 timeline_value; +}; + +/** + * define DRM_ASAHI_BARRIER_NONE - Command index for no barrier + * + * This special value may be passed in to drm_asahi_command::vdm_barrier or + * drm_asahi_command::cdm_barrier to indicate that the respective subqueue + * should not wait on any previous work. + */ +#define DRM_ASAHI_BARRIER_NONE (0xFFFFu) + +/** + * struct drm_asahi_cmd_header - Top level command structure + * + * This struct is core to the command buffer definition and therefore is not + * extensible. + */ +struct drm_asahi_cmd_header { + /** @cmd_type: One of drm_asahi_cmd_type */ + __u16 cmd_type; + + /** + * @size: Size of this command, not including this header. + * + * For hardware commands, this enables extensibility of commands without + * requiring extra command types. Passing a command that is shorter + * than expected is explicitly allowed for backwards-compatibility. + * Truncated fields will be zeroed. + * + * For the synthetic attachment setting commands, this implicitly + * encodes the number of attachments. These commands take multiple + * fixed-size @drm_asahi_attachment structures as their payload, so size + * equals number of attachments * sizeof(struct drm_asahi_attachment). + */ + __u16 size; + + /** + * @vdm_barrier: VDM (render) command index to wait on. + * + * Barriers are indices relative to the beginning of a given submit. A + * barrier of 0 waits on commands submitted to the respective subqueue + * in previous submit ioctls. A barrier of N waits on N previous + * commands on the subqueue within the current submit ioctl. As a + * special case, passing @DRM_ASAHI_BARRIER_NONE avoids waiting on any + * commands in the subqueue. + * + * Examples: + * + * 0: This waits on all previous work. + * + * NONE: This does not wait for anything on this subqueue. + * + * 1: This waits on the first render command in the submit. + * This is valid only if there are multiple render commands in the + * same submit. + * + * Barriers are valid only for hardware commands. Synthetic software + * commands to set attachments must pass NONE here. + */ + __u16 vdm_barrier; + + /** + * @cdm_barrier: CDM (compute) command index to wait on. + * + * See @vdm_barrier, and replace VDM/render with CDM/compute. + */ + __u16 cdm_barrier; +}; + +/** + * struct drm_asahi_submit - Arguments passed to DRM_IOCTL_ASAHI_SUBMIT + */ +struct drm_asahi_submit { + /** + * @syncs: An optional pointer to an array of drm_asahi_sync. The first + * @in_sync_count elements are in-syncs, then the remaining + * @out_sync_count elements are out-syncs. Using a single array with + * explicit partitioning simplifies handling. + */ + __u64 syncs; + + /** + * @cmdbuf: Pointer to the command buffer to submit. + * + * This is a flat command buffer. By design, it contains no CPU + * pointers, which makes it suitable for a virtgpu wire protocol without + * requiring any serializing/deserializing step. + * + * It consists of a series of commands. Each command begins with a + * fixed-size @drm_asahi_cmd_header header and is followed by a + * variable-length payload according to the type and size in the header. + * + * The combined count of "real" hardware commands must be nonzero and at + * most drm_asahi_params_global::max_commands_per_submission. + */ + __u64 cmdbuf; + + /** @flags: Flags for command submission (MBZ) */ + __u32 flags; + + /** @queue_id: The queue ID to be submitted to */ + __u32 queue_id; + + /** + * @in_sync_count: Number of sync objects to wait on before starting + * this job. + */ + __u32 in_sync_count; + + /** + * @out_sync_count: Number of sync objects to signal upon completion of + * this job. + */ + __u32 out_sync_count; + + /** @cmdbuf_size: Command buffer size in bytes */ + __u32 cmdbuf_size; + + /** @pad: MBZ */ + __u32 pad; +}; + +/** + * struct drm_asahi_attachment - Describe an "attachment". + * + * Attachments are any memory written by shaders, notably including render + * target attachments written by the end-of-tile program. This is purely a hint + * about the accessed memory regions. It is optional to specify, which is + * fortunate as it cannot be specified precisely with bindless access anyway. + * But where possible, it's probably a good idea for userspace to include these + * hints, forwarded to the firmware. + * + * This struct is implicitly sized and therefore is not extensible. + */ +struct drm_asahi_attachment { + /** @pointer: Base address of the attachment */ + __u64 pointer; + + /** @size: Size of the attachment in bytes */ + __u64 size; + + /** @pad: MBZ */ + __u32 pad; + + /** @flags: MBZ */ + __u32 flags; +}; + +enum drm_asahi_render_flags { + /** + * @DRM_ASAHI_RENDER_VERTEX_SCRATCH: A vertex stage shader uses scratch + * memory. + */ + DRM_ASAHI_RENDER_VERTEX_SCRATCH = (1U << 0), + + /** + * @DRM_ASAHI_RENDER_PROCESS_EMPTY_TILES: Process even empty tiles. + * This must be set when clearing render targets. + */ + DRM_ASAHI_RENDER_PROCESS_EMPTY_TILES = (1U << 1), + + /** + * @DRM_ASAHI_RENDER_NO_VERTEX_CLUSTERING: Run vertex stage on a single + * cluster (on multi-cluster GPUs) + * + * This harms performance but can workaround certain sync/coherency + * bugs, and therefore is useful for debugging. + */ + DRM_ASAHI_RENDER_NO_VERTEX_CLUSTERING = (1U << 2), + + /** + * @DRM_ASAHI_RENDER_DBIAS_IS_INT: Use integer depth bias formula. + * + * Graphics specifications contain two alternate formulas for depth + * bias, a float formula used with floating-point depth buffers and an + * integer formula using with unorm depth buffers. This flag specifies + * that the integer formula should be used. If omitted, the float + * formula is used instead. + * + * This corresponds to bit 18 of the relevant hardware control register, + * so we match that here for efficiency. + */ + DRM_ASAHI_RENDER_DBIAS_IS_INT = (1U << 18), +}; + +/** + * struct drm_asahi_zls_buffer - Describe a depth or stencil buffer. + * + * These fields correspond to hardware registers in the ZLS (Z Load/Store) unit. + * There are three hardware registers for each field respectively for loads, + * stores, and partial renders. In practice, it makes sense to set all to the + * same values, except in exceptional cases not yet implemented in userspace, so + * we do not duplicate here for simplicity/efficiency. + * + * This struct is embedded in other structs and therefore is not extensible. + */ +struct drm_asahi_zls_buffer { + /** @base: Base address of the buffer */ + __u64 base; + + /** + * @comp_base: If the load buffer is compressed, address of the + * compression metadata section. + */ + __u64 comp_base; + + /** + * @stride: If layered rendering is enabled, the number of bytes + * between each layer of the buffer. + */ + __u32 stride; + + /** + * @comp_stride: If layered rendering is enabled, the number of bytes + * between each layer of the compression metadata. + */ + __u32 comp_stride; +}; + +/** + * struct drm_asahi_timestamp - Describe a timestamp write. + * + * The firmware can optionally write the GPU timestamp at render pass + * granularities, but it needs to be mapped specially via + * DRM_IOCTL_ASAHI_GEM_BIND_OBJECT. This structure therefore describes where to + * write as a handle-offset pair, rather than a GPU address like normal. + * + * This struct is embedded in other structs and therefore is not extensible. + */ +struct drm_asahi_timestamp { + /** + * @handle: Handle of the timestamp buffer, or 0 to skip this + * timestamp. If nonzero, this must equal the value returned in + * drm_asahi_gem_bind_object::object_handle. + */ + __u32 handle; + + /** @offset: Offset to write into the timestamp buffer */ + __u32 offset; +}; + +/** + * struct drm_asahi_timestamps - Describe timestamp writes. + * + * Each operation that can be timestamped, can be timestamped at the start and + * end. Therefore, drm_asahi_timestamp structs always come in pairs, bundled + * together into drm_asahi_timestamps. + * + * This struct is embedded in other structs and therefore is not extensible. + */ +struct drm_asahi_timestamps { + /** @start: Timestamp recorded at the start of the operation */ + struct drm_asahi_timestamp start; + + /** @end: Timestamp recorded at the end of the operation */ + struct drm_asahi_timestamp end; +}; + +/** + * struct drm_asahi_helper_program - Describe helper program configuration. + * + * The helper program is a compute-like kernel required for various hardware + * functionality. Its most important role is dynamically allocating + * scratch/stack memory for individual subgroups, by partitioning a static + * allocation shared for the whole device. It is supplied by userspace via + * drm_asahi_helper_program and internally dispatched by the hardware as needed. + * + * This struct is embedded in other structs and therefore is not extensible. + */ +struct drm_asahi_helper_program { + /** + * @binary: USC address to the helper program binary. This is a tagged + * pointer with configuration in the bottom bits. + */ + __u32 binary; + + /** @cfg: Additional configuration bits for the helper program. */ + __u32 cfg; + + /** + * @data: Data passed to the helper program. This value is not + * interpreted by the kernel, firmware, or hardware in any way. It is + * simply a sideband for userspace, set with the submit ioctl and read + * via special registers inside the helper program. + * + * In practice, userspace will pass a 64-bit GPU VA here pointing to the + * actual arguments, which presumably don't fit in 64-bits. + */ + __u64 data; +}; + +/** + * struct drm_asahi_bg_eot - Describe a background or end-of-tile program. + * + * The background and end-of-tile programs are dispatched by the hardware at the + * beginning and end of rendering. As the hardware "tilebuffer" is simply local + * memory, these programs are necessary to implement API-level render targets. + * The fragment-like background program is responsible for loading either the + * clear colour or the existing render target contents, while the compute-like + * end-of-tile program stores the tilebuffer contents to memory. + * + * This struct is embedded in other structs and therefore is not extensible. + */ +struct drm_asahi_bg_eot { + /** + * @usc: USC address of the hardware USC words binding resources + * (including images and uniforms) and the program itself. Note this is + * an additional layer of indirection compared to the helper program, + * avoiding the need for a sideband for data. This is a tagged pointer + * with additional configuration in the bottom bits. + */ + __u32 usc; + + /** + * @rsrc_spec: Resource specifier for the program. This is a packed + * hardware data structure describing the required number of registers, + * uniforms, bound textures, and bound samplers. + */ + __u32 rsrc_spec; +}; + +/** + * struct drm_asahi_cmd_render - Command to submit 3D + * + * This command submits a single render pass. The hardware control stream may + * include many draws and subpasses, but within the command, the framebuffer + * dimensions and attachments are fixed. + * + * The hardware requires the firmware to set a large number of Control Registers + * setting up state at render pass granularity before each command rendering 3D. + * The firmware bundles this state into data structures. Unfortunately, we + * cannot expose either any of that directly to userspace, because the + * kernel-firmware ABI is not stable. Although we can guarantee the firmware + * updates in tandem with the kernel, we cannot break old userspace when + * upgrading the firmware and kernel. Therefore, we need to abstract well the + * data structures to avoid tying our hands with future firmwares. + * + * The bulk of drm_asahi_cmd_render therefore consists of values of hardware + * control registers, marshalled via the firmware interface. + * + * The framebuffer/tilebuffer dimensions are also specified here. In addition to + * being passed to the firmware/hardware, the kernel requires these dimensions + * to calculate various essential tiling-related data structures. It is + * unfortunate that our submits are heavier than on vendors with saner + * hardware-software interfaces. The upshot is all of this information is + * readily available to userspace with all current APIs. + * + * It looks odd - but it's not overly burdensome and it ensures we can remain + * compatible with old userspace. + */ +struct drm_asahi_cmd_render { + /** @flags: Combination of drm_asahi_render_flags flags. */ + __u32 flags; + + /** + * @isp_zls_pixels: ISP_ZLS_PIXELS register value. This contains the + * depth/stencil width/height, which may differ from the framebuffer + * width/height. + */ + __u32 isp_zls_pixels; + + /** + * @vdm_ctrl_stream_base: VDM_CTRL_STREAM_BASE register value. GPU + * address to the beginning of the VDM control stream. + */ + __u64 vdm_ctrl_stream_base; + + /** @vertex_helper: Helper program used for the vertex shader */ + struct drm_asahi_helper_program vertex_helper; + + /** @fragment_helper: Helper program used for the fragment shader */ + struct drm_asahi_helper_program fragment_helper; + + /** + * @isp_scissor_base: ISP_SCISSOR_BASE register value. GPU address of an + * array of scissor descriptors indexed in the render pass. + */ + __u64 isp_scissor_base; + + /** + * @isp_dbias_base: ISP_DBIAS_BASE register value. GPU address of an + * array of depth bias values indexed in the render pass. + */ + __u64 isp_dbias_base; + + /** + * @isp_oclqry_base: ISP_OCLQRY_BASE register value. GPU address of an + * array of occlusion query results written by the render pass. + */ + __u64 isp_oclqry_base; + + /** @depth: Depth buffer */ + struct drm_asahi_zls_buffer depth; + + /** @stencil: Stencil buffer */ + struct drm_asahi_zls_buffer stencil; + + /** @zls_ctrl: ZLS_CTRL register value */ + __u64 zls_ctrl; + + /** @ppp_multisamplectl: PPP_MULTISAMPLECTL register value */ + __u64 ppp_multisamplectl; + + /** + * @sampler_heap: Base address of the sampler heap. This heap is used + * for both vertex shaders and fragment shaders. The registers are + * per-stage, but there is no known use case for separate heaps. + */ + __u64 sampler_heap; + + /** @ppp_ctrl: PPP_CTRL register value */ + __u32 ppp_ctrl; + + /** @width_px: Framebuffer width in pixels */ + __u16 width_px; + + /** @height_px: Framebuffer height in pixels */ + __u16 height_px; + + /** @layers: Number of layers in the framebuffer */ + __u16 layers; + + /** @sampler_count: Number of samplers in the sampler heap. */ + __u16 sampler_count; + + /** @utile_width_px: Width of a logical tilebuffer tile in pixels */ + __u8 utile_width_px; + + /** @utile_height_px: Height of a logical tilebuffer tile in pixels */ + __u8 utile_height_px; + + /** @samples: # of samples in the framebuffer. Must be 1, 2, or 4. */ + __u8 samples; + + /** @sample_size_B: # of bytes in the tilebuffer required per sample. */ + __u8 sample_size_B; + + /** + * @isp_merge_upper_x: 32-bit float used in the hardware triangle + * merging. Calculate as: tan(60 deg) * width. + * + * Making these values UAPI avoids requiring floating-point calculations + * in the kernel in the hot path. + */ + __u32 isp_merge_upper_x; + + /** + * @isp_merge_upper_y: 32-bit float. Calculate as: tan(60 deg) * height. + * See @isp_merge_upper_x. + */ + __u32 isp_merge_upper_y; + + /** @bg: Background program run for each tile at the start */ + struct drm_asahi_bg_eot bg; + + /** @eot: End-of-tile program ran for each tile at the end */ + struct drm_asahi_bg_eot eot; + + /** + * @partial_bg: Background program ran at the start of each tile when + * resuming the render pass during a partial render. + */ + struct drm_asahi_bg_eot partial_bg; + + /** + * @partial_eot: End-of-tile program ran at the end of each tile when + * pausing the render pass during a partial render. + */ + struct drm_asahi_bg_eot partial_eot; + + /** + * @isp_bgobjdepth: ISP_BGOBJDEPTH register value. This is the depth + * buffer clear value, encoded in the depth buffer's format: either a + * 32-bit float or a 16-bit unorm (with upper bits zeroed). + */ + __u32 isp_bgobjdepth; + + /** + * @isp_bgobjvals: ISP_BGOBJVALS register value. The bottom 8-bits + * contain the stencil buffer clear value. + */ + __u32 isp_bgobjvals; + + /** @ts_vtx: Timestamps for the vertex portion of the render */ + struct drm_asahi_timestamps ts_vtx; + + /** @ts_frag: Timestamps for the fragment portion of the render */ + struct drm_asahi_timestamps ts_frag; +}; + +/** + * struct drm_asahi_cmd_compute - Command to submit compute + * + * This command submits a control stream consisting of compute dispatches. There + * is essentially no limit on how many compute dispatches may be included in a + * single compute command, although timestamps are at command granularity. + */ +struct drm_asahi_cmd_compute { + /** @flags: MBZ */ + __u32 flags; + + /** @sampler_count: Number of samplers in the sampler heap. */ + __u32 sampler_count; + + /** + * @cdm_ctrl_stream_base: CDM_CTRL_STREAM_BASE register value. GPU + * address to the beginning of the CDM control stream. + */ + __u64 cdm_ctrl_stream_base; + + /** + * @cdm_ctrl_stream_end: GPU base address to the end of the hardware + * control stream. Note this only considers the first contiguous segment + * of the control stream, as the stream might jump elsewhere. + */ + __u64 cdm_ctrl_stream_end; + + /** @sampler_heap: Base address of the sampler heap. */ + __u64 sampler_heap; + + /** @helper: Helper program used for this compute command */ + struct drm_asahi_helper_program helper; + + /** @ts: Timestamps for the compute command */ + struct drm_asahi_timestamps ts; +}; + +/** + * struct drm_asahi_get_time - Arguments passed to DRM_IOCTL_ASAHI_GET_TIME + */ +struct drm_asahi_get_time { + /** @flags: MBZ. */ + __u64 flags; + + /** @gpu_timestamp: On return, the GPU timestamp in nanoseconds. */ + __u64 gpu_timestamp; +}; + +/** + * DRM_IOCTL_ASAHI() - Build an Asahi IOCTL number + * @__access: Access type. Must be R, W or RW. + * @__id: One of the DRM_ASAHI_xxx id. + * @__type: Suffix of the type being passed to the IOCTL. + * + * Don't use this macro directly, use the DRM_IOCTL_ASAHI_xxx + * values instead. + * + * Return: An IOCTL number to be passed to ioctl() from userspace. + */ +#define DRM_IOCTL_ASAHI(__access, __id, __type) \ + DRM_IO##__access(DRM_COMMAND_BASE + DRM_ASAHI_##__id, \ + struct drm_asahi_##__type) + +/* Note: this is an enum so that it can be resolved by Rust bindgen. */ +enum { + DRM_IOCTL_ASAHI_GET_PARAMS = DRM_IOCTL_ASAHI(W, GET_PARAMS, get_params), + DRM_IOCTL_ASAHI_GET_TIME = DRM_IOCTL_ASAHI(WR, GET_TIME, get_time), + DRM_IOCTL_ASAHI_VM_CREATE = DRM_IOCTL_ASAHI(WR, VM_CREATE, vm_create), + DRM_IOCTL_ASAHI_VM_DESTROY = DRM_IOCTL_ASAHI(W, VM_DESTROY, vm_destroy), + DRM_IOCTL_ASAHI_VM_BIND = DRM_IOCTL_ASAHI(W, VM_BIND, vm_bind), + DRM_IOCTL_ASAHI_GEM_CREATE = + DRM_IOCTL_ASAHI(WR, GEM_CREATE, gem_create), + DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET = + DRM_IOCTL_ASAHI(WR, GEM_MMAP_OFFSET, gem_mmap_offset), + DRM_IOCTL_ASAHI_GEM_BIND_OBJECT = + DRM_IOCTL_ASAHI(WR, GEM_BIND_OBJECT, gem_bind_object), + DRM_IOCTL_ASAHI_QUEUE_CREATE = + DRM_IOCTL_ASAHI(WR, QUEUE_CREATE, queue_create), + DRM_IOCTL_ASAHI_QUEUE_DESTROY = + DRM_IOCTL_ASAHI(W, QUEUE_DESTROY, queue_destroy), + DRM_IOCTL_ASAHI_SUBMIT = DRM_IOCTL_ASAHI(W, SUBMIT, submit), +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _ASAHI_DRM_H_ */ diff --git a/include/arch/x86_64/drm/drm.h b/include/arch/x86_64/drm/drm.h new file mode 100644 index 00000000..ebd6ebb5 --- /dev/null +++ b/include/arch/x86_64/drm/drm.h @@ -0,0 +1,1449 @@ +/* + * Header for the Direct Rendering Manager + * + * Author: Rickard E. (Rik) Faith <faith@valinux.com> + * + * Acknowledgments: + * Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic cmpxchg. + */ + +/* + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DRM_H_ +#define _DRM_H_ + +#if defined(__linux__) + +#include <linux/types.h> +#include <asm/ioctl.h> +typedef unsigned int drm_handle_t; + +#else /* One of the BSDs */ + +#include <stdint.h> +#include <sys/ioccom.h> +#include <sys/types.h> +typedef int8_t __s8; +typedef uint8_t __u8; +typedef int16_t __s16; +typedef uint16_t __u16; +typedef int32_t __s32; +typedef uint32_t __u32; +typedef int64_t __s64; +typedef uint64_t __u64; +typedef size_t __kernel_size_t; +typedef unsigned long drm_handle_t; + +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_NAME "drm" /**< Name in kernel, /dev, and /proc */ +#define DRM_MIN_ORDER 5 /**< At least 2^5 bytes = 32 bytes */ +#define DRM_MAX_ORDER 22 /**< Up to 2^22 bytes = 4MB */ +#define DRM_RAM_PERCENT 10 /**< How much system ram can we lock? */ + +#define _DRM_LOCK_HELD 0x80000000U /**< Hardware lock is held */ +#define _DRM_LOCK_CONT 0x40000000U /**< Hardware lock is contended */ +#define _DRM_LOCK_IS_HELD(lock) ((lock) & _DRM_LOCK_HELD) +#define _DRM_LOCK_IS_CONT(lock) ((lock) & _DRM_LOCK_CONT) +#define _DRM_LOCKING_CONTEXT(lock) ((lock) & ~(_DRM_LOCK_HELD | _DRM_LOCK_CONT)) + +typedef unsigned int drm_context_t; +typedef unsigned int drm_drawable_t; +typedef unsigned int drm_magic_t; + +/* + * Cliprect. + * + * \warning: If you change this structure, make sure you change + * XF86DRIClipRectRec in the server as well + * + * \note KW: Actually it's illegal to change either for + * backwards-compatibility reasons. + */ +struct drm_clip_rect { + unsigned short x1; + unsigned short y1; + unsigned short x2; + unsigned short y2; +}; + +/* + * Drawable information. + */ +struct drm_drawable_info { + unsigned int num_rects; + struct drm_clip_rect *rects; +}; + +/* + * Texture region, + */ +struct drm_tex_region { + unsigned char next; + unsigned char prev; + unsigned char in_use; + unsigned char padding; + unsigned int age; +}; + +/* + * Hardware lock. + * + * The lock structure is a simple cache-line aligned integer. To avoid + * processor bus contention on a multiprocessor system, there should not be any + * other data stored in the same cache line. + */ +struct drm_hw_lock { + __volatile__ unsigned int lock; /**< lock variable */ + char padding[60]; /**< Pad to cache line */ +}; + +/* + * DRM_IOCTL_VERSION ioctl argument type. + * + * \sa drmGetVersion(). + */ +struct drm_version { + int version_major; /**< Major version */ + int version_minor; /**< Minor version */ + int version_patchlevel; /**< Patch level */ + __kernel_size_t name_len; /**< Length of name buffer */ + char *name; /**< Name of driver */ + __kernel_size_t date_len; /**< Length of date buffer */ + char *date; /**< User-space buffer to hold date */ + __kernel_size_t desc_len; /**< Length of desc buffer */ + char *desc; /**< User-space buffer to hold desc */ +}; + +/* + * DRM_IOCTL_GET_UNIQUE ioctl argument type. + * + * \sa drmGetBusid() and drmSetBusId(). + */ +struct drm_unique { + __kernel_size_t unique_len; /**< Length of unique */ + char *unique; /**< Unique name for driver instantiation */ +}; + +struct drm_list { + int count; /**< Length of user-space structures */ + struct drm_version *version; +}; + +struct drm_block { + int unused; +}; + +/* + * DRM_IOCTL_CONTROL ioctl argument type. + * + * \sa drmCtlInstHandler() and drmCtlUninstHandler(). + */ +struct drm_control { + enum { + DRM_ADD_COMMAND, + DRM_RM_COMMAND, + DRM_INST_HANDLER, + DRM_UNINST_HANDLER + } func; + int irq; +}; + +/* + * Type of memory to map. + */ +enum drm_map_type { + _DRM_FRAME_BUFFER = 0, /**< WC (no caching), no core dump */ + _DRM_REGISTERS = 1, /**< no caching, no core dump */ + _DRM_SHM = 2, /**< shared, cached */ + _DRM_AGP = 3, /**< AGP/GART */ + _DRM_SCATTER_GATHER = 4, /**< Scatter/gather memory for PCI DMA */ + _DRM_CONSISTENT = 5 /**< Consistent memory for PCI DMA */ +}; + +/* + * Memory mapping flags. + */ +enum drm_map_flags { + _DRM_RESTRICTED = 0x01, /**< Cannot be mapped to user-virtual */ + _DRM_READ_ONLY = 0x02, + _DRM_LOCKED = 0x04, /**< shared, cached, locked */ + _DRM_KERNEL = 0x08, /**< kernel requires access */ + _DRM_WRITE_COMBINING = 0x10, /**< use write-combining if available */ + _DRM_CONTAINS_LOCK = 0x20, /**< SHM page that contains lock */ + _DRM_REMOVABLE = 0x40, /**< Removable mapping */ + _DRM_DRIVER = 0x80 /**< Managed by driver */ +}; + +struct drm_ctx_priv_map { + unsigned int ctx_id; /**< Context requesting private mapping */ + void *handle; /**< Handle of map */ +}; + +/* + * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls + * argument type. + * + * \sa drmAddMap(). + */ +struct drm_map { + unsigned long offset; /**< Requested physical address (0 for SAREA)*/ + unsigned long size; /**< Requested physical size (bytes) */ + enum drm_map_type type; /**< Type of memory to map */ + enum drm_map_flags flags; /**< Flags */ + void *handle; /**< User-space: "Handle" to pass to mmap() */ + /**< Kernel-space: kernel-virtual address */ + int mtrr; /**< MTRR slot used */ + /* Private data */ +}; + +/* + * DRM_IOCTL_GET_CLIENT ioctl argument type. + */ +struct drm_client { + int idx; /**< Which client desired? */ + int auth; /**< Is client authenticated? */ + unsigned long pid; /**< Process ID */ + unsigned long uid; /**< User ID */ + unsigned long magic; /**< Magic */ + unsigned long iocs; /**< Ioctl count */ +}; + +enum drm_stat_type { + _DRM_STAT_LOCK, + _DRM_STAT_OPENS, + _DRM_STAT_CLOSES, + _DRM_STAT_IOCTLS, + _DRM_STAT_LOCKS, + _DRM_STAT_UNLOCKS, + _DRM_STAT_VALUE, /**< Generic value */ + _DRM_STAT_BYTE, /**< Generic byte counter (1024bytes/K) */ + _DRM_STAT_COUNT, /**< Generic non-byte counter (1000/k) */ + + _DRM_STAT_IRQ, /**< IRQ */ + _DRM_STAT_PRIMARY, /**< Primary DMA bytes */ + _DRM_STAT_SECONDARY, /**< Secondary DMA bytes */ + _DRM_STAT_DMA, /**< DMA */ + _DRM_STAT_SPECIAL, /**< Special DMA (e.g., priority or polled) */ + _DRM_STAT_MISSED /**< Missed DMA opportunity */ + /* Add to the *END* of the list */ +}; + +/* + * DRM_IOCTL_GET_STATS ioctl argument type. + */ +struct drm_stats { + unsigned long count; + struct { + unsigned long value; + enum drm_stat_type type; + } data[15]; +}; + +/* + * Hardware locking flags. + */ +enum drm_lock_flags { + _DRM_LOCK_READY = 0x01, /**< Wait until hardware is ready for DMA */ + _DRM_LOCK_QUIESCENT = 0x02, /**< Wait until hardware quiescent */ + _DRM_LOCK_FLUSH = 0x04, /**< Flush this context's DMA queue first */ + _DRM_LOCK_FLUSH_ALL = 0x08, /**< Flush all DMA queues first */ + /* These *HALT* flags aren't supported yet + -- they will be used to support the + full-screen DGA-like mode. */ + _DRM_HALT_ALL_QUEUES = 0x10, /**< Halt all current and future queues */ + _DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */ +}; + +/* + * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type. + * + * \sa drmGetLock() and drmUnlock(). + */ +struct drm_lock { + int context; + enum drm_lock_flags flags; +}; + +/* + * DMA flags + * + * \warning + * These values \e must match xf86drm.h. + * + * \sa drm_dma. + */ +enum drm_dma_flags { + /* Flags for DMA buffer dispatch */ + _DRM_DMA_BLOCK = 0x01, /**< + * Block until buffer dispatched. + * + * \note The buffer may not yet have + * been processed by the hardware -- + * getting a hardware lock with the + * hardware quiescent will ensure + * that the buffer has been + * processed. + */ + _DRM_DMA_WHILE_LOCKED = 0x02, /**< Dispatch while lock held */ + _DRM_DMA_PRIORITY = 0x04, /**< High priority dispatch */ + + /* Flags for DMA buffer request */ + _DRM_DMA_WAIT = 0x10, /**< Wait for free buffers */ + _DRM_DMA_SMALLER_OK = 0x20, /**< Smaller-than-requested buffers OK */ + _DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */ +}; + +/* + * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type. + * + * \sa drmAddBufs(). + */ +struct drm_buf_desc { + int count; /**< Number of buffers of this size */ + int size; /**< Size in bytes */ + int low_mark; /**< Low water mark */ + int high_mark; /**< High water mark */ + enum { + _DRM_PAGE_ALIGN = 0x01, /**< Align on page boundaries for DMA */ + _DRM_AGP_BUFFER = 0x02, /**< Buffer is in AGP space */ + _DRM_SG_BUFFER = 0x04, /**< Scatter/gather memory buffer */ + _DRM_FB_BUFFER = 0x08, /**< Buffer is in frame buffer */ + _DRM_PCI_BUFFER_RO = 0x10 /**< Map PCI DMA buffer read-only */ + } flags; + unsigned long agp_start; /**< + * Start address of where the AGP buffers are + * in the AGP aperture + */ +}; + +/* + * DRM_IOCTL_INFO_BUFS ioctl argument type. + */ +struct drm_buf_info { + int count; /**< Entries in list */ + struct drm_buf_desc *list; +}; + +/* + * DRM_IOCTL_FREE_BUFS ioctl argument type. + */ +struct drm_buf_free { + int count; + int *list; +}; + +/* + * Buffer information + * + * \sa drm_buf_map. + */ +struct drm_buf_pub { + int idx; /**< Index into the master buffer list */ + int total; /**< Buffer size */ + int used; /**< Amount of buffer in use (for DMA) */ + void *address; /**< Address of buffer */ +}; + +/* + * DRM_IOCTL_MAP_BUFS ioctl argument type. + */ +struct drm_buf_map { + int count; /**< Length of the buffer list */ +#ifdef __cplusplus + void *virt; +#else + void *virtual; /**< Mmap'd area in user-virtual */ +#endif + struct drm_buf_pub *list; /**< Buffer information */ +}; + +/* + * DRM_IOCTL_DMA ioctl argument type. + * + * Indices here refer to the offset into the buffer list in drm_buf_get. + * + * \sa drmDMA(). + */ +struct drm_dma { + int context; /**< Context handle */ + int send_count; /**< Number of buffers to send */ + int *send_indices; /**< List of handles to buffers */ + int *send_sizes; /**< Lengths of data to send */ + enum drm_dma_flags flags; /**< Flags */ + int request_count; /**< Number of buffers requested */ + int request_size; /**< Desired size for buffers */ + int *request_indices; /**< Buffer information */ + int *request_sizes; + int granted_count; /**< Number of buffers granted */ +}; + +enum drm_ctx_flags { + _DRM_CONTEXT_PRESERVED = 0x01, + _DRM_CONTEXT_2DONLY = 0x02 +}; + +/* + * DRM_IOCTL_ADD_CTX ioctl argument type. + * + * \sa drmCreateContext() and drmDestroyContext(). + */ +struct drm_ctx { + drm_context_t handle; + enum drm_ctx_flags flags; +}; + +/* + * DRM_IOCTL_RES_CTX ioctl argument type. + */ +struct drm_ctx_res { + int count; + struct drm_ctx *contexts; +}; + +/* + * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type. + */ +struct drm_draw { + drm_drawable_t handle; +}; + +/* + * DRM_IOCTL_UPDATE_DRAW ioctl argument type. + */ +typedef enum { DRM_DRAWABLE_CLIPRECTS } drm_drawable_info_type_t; + +struct drm_update_draw { + drm_drawable_t handle; + unsigned int type; + unsigned int num; + unsigned long long data; +}; + +/* + * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type. + */ +struct drm_auth { + drm_magic_t magic; +}; + +/* + * DRM_IOCTL_IRQ_BUSID ioctl argument type. + * + * \sa drmGetInterruptFromBusID(). + */ +struct drm_irq_busid { + int irq; /**< IRQ number */ + int busnum; /**< bus number */ + int devnum; /**< device number */ + int funcnum; /**< function number */ +}; + +enum drm_vblank_seq_type { + _DRM_VBLANK_ABSOLUTE = 0x0, /**< Wait for specific vblank sequence + number */ + _DRM_VBLANK_RELATIVE = 0x1, /**< Wait for given number of vblanks */ + /* bits 1-6 are reserved for high crtcs */ + _DRM_VBLANK_HIGH_CRTC_MASK = 0x0000003e, + _DRM_VBLANK_EVENT = 0x4000000, /**< Send event instead of blocking */ + _DRM_VBLANK_FLIP = 0x8000000, /**< Scheduled buffer swap should flip */ + _DRM_VBLANK_NEXTONMISS = 0x10000000, /**< If missed, wait for next + vblank */ + _DRM_VBLANK_SECONDARY = 0x20000000, /**< Secondary display controller */ + _DRM_VBLANK_SIGNAL = 0x40000000 /**< Send signal instead of blocking, + unsupported */ +}; +#define _DRM_VBLANK_HIGH_CRTC_SHIFT 1 + +#define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE) +#define _DRM_VBLANK_FLAGS_MASK \ + (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | _DRM_VBLANK_SECONDARY | \ + _DRM_VBLANK_NEXTONMISS) + +struct drm_wait_vblank_request { + enum drm_vblank_seq_type type; + unsigned int sequence; + unsigned long signal; +}; + +struct drm_wait_vblank_reply { + enum drm_vblank_seq_type type; + unsigned int sequence; + long tval_sec; + long tval_usec; +}; + +/* + * DRM_IOCTL_WAIT_VBLANK ioctl argument type. + * + * \sa drmWaitVBlank(). + */ +union drm_wait_vblank { + struct drm_wait_vblank_request request; + struct drm_wait_vblank_reply reply; +}; + +#define _DRM_PRE_MODESET 1 +#define _DRM_POST_MODESET 2 + +/* + * DRM_IOCTL_MODESET_CTL ioctl argument type + * + * \sa drmModesetCtl(). + */ +struct drm_modeset_ctl { + __u32 crtc; + __u32 cmd; +}; + +/* + * DRM_IOCTL_AGP_ENABLE ioctl argument type. + * + * \sa drmAgpEnable(). + */ +struct drm_agp_mode { + unsigned long mode; /**< AGP mode */ +}; + +/* + * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type. + * + * \sa drmAgpAlloc() and drmAgpFree(). + */ +struct drm_agp_buffer { + unsigned long size; /**< In bytes -- will round to page boundary */ + unsigned long handle; /**< Used for binding / unbinding */ + unsigned long type; /**< Type of memory to allocate */ + unsigned long physical; /**< Physical used by i810 */ +}; + +/* + * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type. + * + * \sa drmAgpBind() and drmAgpUnbind(). + */ +struct drm_agp_binding { + unsigned long handle; /**< From drm_agp_buffer */ + unsigned long offset; /**< In bytes -- will round to page boundary */ +}; + +/* + * DRM_IOCTL_AGP_INFO ioctl argument type. + * + * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(), + * drmAgpBase(), drmAgpSize(), drmAgpMemoryUsed(), drmAgpMemoryAvail(), + * drmAgpVendorId() and drmAgpDeviceId(). + */ +struct drm_agp_info { + int agp_version_major; + int agp_version_minor; + unsigned long mode; + unsigned long aperture_base; /* physical address */ + unsigned long aperture_size; /* bytes */ + unsigned long memory_allowed; /* bytes */ + unsigned long memory_used; + + /* PCI information */ + unsigned short id_vendor; + unsigned short id_device; +}; + +/* + * DRM_IOCTL_SG_ALLOC ioctl argument type. + */ +struct drm_scatter_gather { + unsigned long size; /**< In bytes -- will round to page boundary */ + unsigned long handle; /**< Used for mapping / unmapping */ +}; + +/* + * DRM_IOCTL_SET_VERSION ioctl argument type. + */ +struct drm_set_version { + int drm_di_major; + int drm_di_minor; + int drm_dd_major; + int drm_dd_minor; +}; + +/* DRM_IOCTL_GEM_CLOSE ioctl argument type */ +struct drm_gem_close { + /** Handle of the object to be closed. */ + __u32 handle; + __u32 pad; +}; + +/* DRM_IOCTL_GEM_FLINK ioctl argument type */ +struct drm_gem_flink { + /** Handle for the object being named */ + __u32 handle; + + /** Returned global name */ + __u32 name; +}; + +/* DRM_IOCTL_GEM_OPEN ioctl argument type */ +struct drm_gem_open { + /** Name of object being opened */ + __u32 name; + + /** Returned handle for the object */ + __u32 handle; + + /** Returned size of the object */ + __u64 size; +}; + +/** + * DRM_CAP_DUMB_BUFFER + * + * If set to 1, the driver supports creating dumb buffers via the + * &DRM_IOCTL_MODE_CREATE_DUMB ioctl. + */ +#define DRM_CAP_DUMB_BUFFER 0x1 +/** + * DRM_CAP_VBLANK_HIGH_CRTC + * + * If set to 1, the kernel supports specifying a :ref:`CRTC index<crtc_index>` + * in the high bits of &drm_wait_vblank_request.type. + * + * Starting kernel version 2.6.39, this capability is always set to 1. + */ +#define DRM_CAP_VBLANK_HIGH_CRTC 0x2 +/** + * DRM_CAP_DUMB_PREFERRED_DEPTH + * + * The preferred bit depth for dumb buffers. + * + * The bit depth is the number of bits used to indicate the color of a single + * pixel excluding any padding. This is different from the number of bits per + * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per + * pixel. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ +#define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3 +/** + * DRM_CAP_DUMB_PREFER_SHADOW + * + * If set to 1, the driver prefers userspace to render to a shadow buffer + * instead of directly rendering to a dumb buffer. For best speed, userspace + * should do streaming ordered memory copies into the dumb buffer and never + * read from it. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ +#define DRM_CAP_DUMB_PREFER_SHADOW 0x4 +/** + * DRM_CAP_PRIME + * + * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT + * and &DRM_PRIME_CAP_EXPORT. + * + * Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and + * &DRM_PRIME_CAP_EXPORT are always advertised. + * + * PRIME buffers are exposed as dma-buf file descriptors. + * See :ref:`prime_buffer_sharing`. + */ +#define DRM_CAP_PRIME 0x5 +/** + * DRM_PRIME_CAP_IMPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME + * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. + */ +#define DRM_PRIME_CAP_IMPORT 0x1 +/** + * DRM_PRIME_CAP_EXPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME + * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. + */ +#define DRM_PRIME_CAP_EXPORT 0x2 +/** + * DRM_CAP_TIMESTAMP_MONOTONIC + * + * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in + * struct drm_event_vblank. If set to 1, the kernel will report timestamps with + * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these + * clocks. + * + * Starting from kernel version 2.6.39, the default value for this capability + * is 1. Starting kernel version 4.15, this capability is always set to 1. + */ +#define DRM_CAP_TIMESTAMP_MONOTONIC 0x6 +/** + * DRM_CAP_ASYNC_PAGE_FLIP + * + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy + * page-flips. + */ +#define DRM_CAP_ASYNC_PAGE_FLIP 0x7 +/** + * DRM_CAP_CURSOR_WIDTH + * + * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid + * width x height combination for the hardware cursor. The intention is that a + * hardware agnostic userspace can query a cursor plane size to use. + * + * Note that the cross-driver contract is to merely return a valid size; + * drivers are free to attach another meaning on top, eg. i915 returns the + * maximum plane size. + */ +#define DRM_CAP_CURSOR_WIDTH 0x8 +/** + * DRM_CAP_CURSOR_HEIGHT + * + * See &DRM_CAP_CURSOR_WIDTH. + */ +#define DRM_CAP_CURSOR_HEIGHT 0x9 +/** + * DRM_CAP_ADDFB2_MODIFIERS + * + * If set to 1, the driver supports supplying modifiers in the + * &DRM_IOCTL_MODE_ADDFB2 ioctl. + */ +#define DRM_CAP_ADDFB2_MODIFIERS 0x10 +/** + * DRM_CAP_PAGE_FLIP_TARGET + * + * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and + * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in + * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP + * ioctl. + */ +#define DRM_CAP_PAGE_FLIP_TARGET 0x11 +/** + * DRM_CAP_CRTC_IN_VBLANK_EVENT + * + * If set to 1, the kernel supports reporting the CRTC ID in + * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and + * &DRM_EVENT_FLIP_COMPLETE events. + * + * Starting kernel version 4.12, this capability is always set to 1. + */ +#define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 +/** + * DRM_CAP_SYNCOBJ + * + * If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`. + */ +#define DRM_CAP_SYNCOBJ 0x13 +/** + * DRM_CAP_SYNCOBJ_TIMELINE + * + * If set to 1, the driver supports timeline operations on sync objects. See + * :ref:`drm_sync_objects`. + */ +#define DRM_CAP_SYNCOBJ_TIMELINE 0x14 +/** + * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP + * + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic + * commits. + */ +#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15 + +/* DRM_IOCTL_GET_CAP ioctl argument type */ +struct drm_get_cap { + __u64 capability; + __u64 value; +}; + +/** + * DRM_CLIENT_CAP_STEREO_3D + * + * If set to 1, the DRM core will expose the stereo 3D capabilities of the + * monitor by advertising the supported 3D layouts in the flags of struct + * drm_mode_modeinfo. See ``DRM_MODE_FLAG_3D_*``. + * + * This capability is always supported for all drivers starting from kernel + * version 3.13. + */ +#define DRM_CLIENT_CAP_STEREO_3D 1 + +/** + * DRM_CLIENT_CAP_UNIVERSAL_PLANES + * + * If set to 1, the DRM core will expose all planes (overlay, primary, and + * cursor) to userspace. + * + * This capability has been introduced in kernel version 3.15. Starting from + * kernel version 3.17, this capability is always supported for all drivers. + */ +#define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2 + +/** + * DRM_CLIENT_CAP_ATOMIC + * + * If set to 1, the DRM core will expose atomic properties to userspace. This + * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and + * &DRM_CLIENT_CAP_ASPECT_RATIO. + * + * If the driver doesn't support atomic mode-setting, enabling this capability + * will fail with -EOPNOTSUPP. + * + * This capability has been introduced in kernel version 4.0. Starting from + * kernel version 4.2, this capability is always supported for atomic-capable + * drivers. + */ +#define DRM_CLIENT_CAP_ATOMIC 3 + +/** + * DRM_CLIENT_CAP_ASPECT_RATIO + * + * If set to 1, the DRM core will provide aspect ratio information in modes. + * See ``DRM_MODE_FLAG_PIC_AR_*``. + * + * This capability is always supported for all drivers starting from kernel + * version 4.18. + */ +#define DRM_CLIENT_CAP_ASPECT_RATIO 4 + +/** + * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS + * + * If set to 1, the DRM core will expose special connectors to be used for + * writing back to memory the scene setup in the commit. The client must enable + * &DRM_CLIENT_CAP_ATOMIC first. + * + * This capability is always supported for atomic-capable drivers starting from + * kernel version 4.19. + */ +#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 + +/** + * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT + * + * Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and + * virtualbox) have additional restrictions for cursor planes (thus + * making cursor planes on those drivers not truly universal,) e.g. + * they need cursor planes to act like one would expect from a mouse + * cursor and have correctly set hotspot properties. + * If this client cap is not set the DRM core will hide cursor plane on + * those virtualized drivers because not setting it implies that the + * client is not capable of dealing with those extra restictions. + * Clients which do set cursor hotspot and treat the cursor plane + * like a mouse cursor should set this property. + * The client must enable &DRM_CLIENT_CAP_ATOMIC first. + * + * Setting this property on drivers which do not special case + * cursor planes (i.e. non-virtualized drivers) will return + * EOPNOTSUPP, which can be used by userspace to gauge + * requirements of the hardware/drivers they're running on. + * + * This capability is always supported for atomic-capable virtualized + * drivers starting from kernel version 6.6. + */ +#define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT 6 + +/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ +struct drm_set_client_cap { + __u64 capability; + __u64 value; +}; + +#define DRM_RDWR O_RDWR +#define DRM_CLOEXEC O_CLOEXEC +struct drm_prime_handle { + __u32 handle; + + /** Flags.. only applicable for handle->fd */ + __u32 flags; + + /** Returned dmabuf file descriptor */ + __s32 fd; +}; + +struct drm_syncobj_create { + __u32 handle; +#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0) + __u32 flags; +}; + +struct drm_syncobj_destroy { + __u32 handle; + __u32 pad; +}; + +#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE (1 << 1) +#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_TIMELINE (1 << 1) +struct drm_syncobj_handle { + __u32 handle; + __u32 flags; + + __s32 fd; + __u32 pad; + + __u64 point; +}; + +struct drm_syncobj_transfer { + __u32 src_handle; + __u32 dst_handle; + __u64 src_point; + __u64 dst_point; + __u32 flags; + __u32 pad; +}; + +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE \ + (1 << 2) /* wait for time point to become available */ +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE \ + (1 << 3) /* set fence deadline to deadline_nsec */ +struct drm_syncobj_wait { + __u64 handles; + /* absolute timeout */ + __s64 timeout_nsec; + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; + /** + * @deadline_nsec - fence deadline hint + * + * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing + * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is + * set. + */ + __u64 deadline_nsec; +}; + +struct drm_syncobj_timeline_wait { + __u64 handles; + /* wait on specific timeline point for every handles*/ + __u64 points; + /* absolute timeout */ + __s64 timeout_nsec; + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; + /** + * @deadline_nsec - fence deadline hint + * + * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing + * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is + * set. + */ + __u64 deadline_nsec; +}; + +/** + * struct drm_syncobj_eventfd + * @handle: syncobj handle. + * @flags: Zero to wait for the point to be signalled, or + * &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be + * available for the point. + * @point: syncobj timeline point (set to zero for binary syncobjs). + * @fd: Existing eventfd to sent events to. + * @pad: Must be zero. + * + * Register an eventfd to be signalled by a syncobj. The eventfd counter will + * be incremented by one. + */ +struct drm_syncobj_eventfd { + __u32 handle; + __u32 flags; + __u64 point; + __s32 fd; + __u32 pad; +}; + +struct drm_syncobj_array { + __u64 handles; + __u32 count_handles; + __u32 pad; +}; + +#define DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED \ + (1 << 0) /* last available point on timeline syncobj */ +struct drm_syncobj_timeline_array { + __u64 handles; + __u64 points; + __u32 count_handles; + __u32 flags; +}; + +/* Query current scanout sequence number */ +struct drm_crtc_get_sequence { + __u32 crtc_id; /* requested crtc_id */ + __u32 active; /* return: crtc output is active */ + __u64 sequence; /* return: most recent vblank sequence */ + __s64 sequence_ns; /* return: most recent time of first pixel out */ +}; + +/* Queue event to be delivered at specified sequence. Time stamp marks + * when the first pixel of the refresh cycle leaves the display engine + * for the display + */ +#define DRM_CRTC_SEQUENCE_RELATIVE \ + 0x00000001 /* sequence is relative to current */ +#define DRM_CRTC_SEQUENCE_NEXT_ON_MISS \ + 0x00000002 /* Use next sequence if we've missed */ + +struct drm_crtc_queue_sequence { + __u32 crtc_id; + __u32 flags; + __u64 sequence; /* on input, target sequence. on output, actual sequence + */ + __u64 user_data; /* user data passed to event */ +}; + +#define DRM_CLIENT_NAME_MAX_LEN 64 +struct drm_set_client_name { + __u64 name_len; + __u64 name; +}; + +#if defined(__cplusplus) +} +#endif + +#include "drm_mode.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_IOCTL_BASE 'd' +#define DRM_IO(nr) _IO(DRM_IOCTL_BASE, nr) +#define DRM_IOR(nr, type) _IOR(DRM_IOCTL_BASE, nr, type) +#define DRM_IOW(nr, type) _IOW(DRM_IOCTL_BASE, nr, type) +#define DRM_IOWR(nr, type) _IOWR(DRM_IOCTL_BASE, nr, type) + +#define DRM_IOCTL_VERSION DRM_IOWR(0x00, struct drm_version) +#define DRM_IOCTL_GET_UNIQUE DRM_IOWR(0x01, struct drm_unique) +#define DRM_IOCTL_GET_MAGIC DRM_IOR(0x02, struct drm_auth) +#define DRM_IOCTL_IRQ_BUSID DRM_IOWR(0x03, struct drm_irq_busid) +#define DRM_IOCTL_GET_MAP DRM_IOWR(0x04, struct drm_map) +#define DRM_IOCTL_GET_CLIENT DRM_IOWR(0x05, struct drm_client) +#define DRM_IOCTL_GET_STATS DRM_IOR(0x06, struct drm_stats) +#define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version) +#define DRM_IOCTL_MODESET_CTL DRM_IOW(0x08, struct drm_modeset_ctl) +/** + * DRM_IOCTL_GEM_CLOSE - Close a GEM handle. + * + * GEM handles are not reference-counted by the kernel. User-space is + * responsible for managing their lifetime. For example, if user-space imports + * the same memory object twice on the same DRM file description, the same GEM + * handle is returned by both imports, and user-space needs to ensure + * &DRM_IOCTL_GEM_CLOSE is performed once only. The same situation can happen + * when a memory object is allocated, then exported and imported again on the + * same DRM file description. The &DRM_IOCTL_MODE_GETFB2 IOCTL is an exception + * and always returns fresh new GEM handles even if an existing GEM handle + * already refers to the same memory object before the IOCTL is performed. + */ +#define DRM_IOCTL_GEM_CLOSE DRM_IOW(0x09, struct drm_gem_close) +#define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink) +#define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open) +#define DRM_IOCTL_GET_CAP DRM_IOWR(0x0c, struct drm_get_cap) +#define DRM_IOCTL_SET_CLIENT_CAP DRM_IOW(0x0d, struct drm_set_client_cap) + +#define DRM_IOCTL_SET_UNIQUE DRM_IOW(0x10, struct drm_unique) +#define DRM_IOCTL_AUTH_MAGIC DRM_IOW(0x11, struct drm_auth) +#define DRM_IOCTL_BLOCK DRM_IOWR(0x12, struct drm_block) +#define DRM_IOCTL_UNBLOCK DRM_IOWR(0x13, struct drm_block) +#define DRM_IOCTL_CONTROL DRM_IOW(0x14, struct drm_control) +#define DRM_IOCTL_ADD_MAP DRM_IOWR(0x15, struct drm_map) +#define DRM_IOCTL_ADD_BUFS DRM_IOWR(0x16, struct drm_buf_desc) +#define DRM_IOCTL_MARK_BUFS DRM_IOW(0x17, struct drm_buf_desc) +#define DRM_IOCTL_INFO_BUFS DRM_IOWR(0x18, struct drm_buf_info) +#define DRM_IOCTL_MAP_BUFS DRM_IOWR(0x19, struct drm_buf_map) +#define DRM_IOCTL_FREE_BUFS DRM_IOW(0x1a, struct drm_buf_free) + +#define DRM_IOCTL_RM_MAP DRM_IOW(0x1b, struct drm_map) + +#define DRM_IOCTL_SET_SAREA_CTX DRM_IOW(0x1c, struct drm_ctx_priv_map) +#define DRM_IOCTL_GET_SAREA_CTX DRM_IOWR(0x1d, struct drm_ctx_priv_map) + +#define DRM_IOCTL_SET_MASTER DRM_IO(0x1e) +#define DRM_IOCTL_DROP_MASTER DRM_IO(0x1f) + +#define DRM_IOCTL_ADD_CTX DRM_IOWR(0x20, struct drm_ctx) +#define DRM_IOCTL_RM_CTX DRM_IOWR(0x21, struct drm_ctx) +#define DRM_IOCTL_MOD_CTX DRM_IOW(0x22, struct drm_ctx) +#define DRM_IOCTL_GET_CTX DRM_IOWR(0x23, struct drm_ctx) +#define DRM_IOCTL_SWITCH_CTX DRM_IOW(0x24, struct drm_ctx) +#define DRM_IOCTL_NEW_CTX DRM_IOW(0x25, struct drm_ctx) +#define DRM_IOCTL_RES_CTX DRM_IOWR(0x26, struct drm_ctx_res) +#define DRM_IOCTL_ADD_DRAW DRM_IOWR(0x27, struct drm_draw) +#define DRM_IOCTL_RM_DRAW DRM_IOWR(0x28, struct drm_draw) +#define DRM_IOCTL_DMA DRM_IOWR(0x29, struct drm_dma) +#define DRM_IOCTL_LOCK DRM_IOW(0x2a, struct drm_lock) +#define DRM_IOCTL_UNLOCK DRM_IOW(0x2b, struct drm_lock) +#define DRM_IOCTL_FINISH DRM_IOW(0x2c, struct drm_lock) + +/** + * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD. + * + * User-space sets &drm_prime_handle.handle with the GEM handle to export and + * &drm_prime_handle.flags, and gets back a DMA-BUF file descriptor in + * &drm_prime_handle.fd. + * + * The export can fail for any driver-specific reason, e.g. because export is + * not supported for this specific GEM handle (but might be for others). + * + * Support for exporting DMA-BUFs is advertised via &DRM_PRIME_CAP_EXPORT. + */ +#define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle) +/** + * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle. + * + * User-space sets &drm_prime_handle.fd with a DMA-BUF file descriptor to + * import, and gets back a GEM handle in &drm_prime_handle.handle. + * &drm_prime_handle.flags is unused. + * + * If an existing GEM handle refers to the memory object backing the DMA-BUF, + * that GEM handle is returned. Therefore user-space which needs to handle + * arbitrary DMA-BUFs must have a user-space lookup data structure to manually + * reference-count duplicated GEM handles. For more information see + * &DRM_IOCTL_GEM_CLOSE. + * + * The import can fail for any driver-specific reason, e.g. because import is + * only supported for DMA-BUFs allocated on this DRM device. + * + * Support for importing DMA-BUFs is advertised via &DRM_PRIME_CAP_IMPORT. + */ +#define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle) + +#define DRM_IOCTL_AGP_ACQUIRE DRM_IO(0x30) +#define DRM_IOCTL_AGP_RELEASE DRM_IO(0x31) +#define DRM_IOCTL_AGP_ENABLE DRM_IOW(0x32, struct drm_agp_mode) +#define DRM_IOCTL_AGP_INFO DRM_IOR(0x33, struct drm_agp_info) +#define DRM_IOCTL_AGP_ALLOC DRM_IOWR(0x34, struct drm_agp_buffer) +#define DRM_IOCTL_AGP_FREE DRM_IOW(0x35, struct drm_agp_buffer) +#define DRM_IOCTL_AGP_BIND DRM_IOW(0x36, struct drm_agp_binding) +#define DRM_IOCTL_AGP_UNBIND DRM_IOW(0x37, struct drm_agp_binding) + +#define DRM_IOCTL_SG_ALLOC DRM_IOWR(0x38, struct drm_scatter_gather) +#define DRM_IOCTL_SG_FREE DRM_IOW(0x39, struct drm_scatter_gather) + +#define DRM_IOCTL_WAIT_VBLANK DRM_IOWR(0x3a, union drm_wait_vblank) + +#define DRM_IOCTL_CRTC_GET_SEQUENCE DRM_IOWR(0x3b, struct drm_crtc_get_sequence) +#define DRM_IOCTL_CRTC_QUEUE_SEQUENCE \ + DRM_IOWR(0x3c, struct drm_crtc_queue_sequence) + +#define DRM_IOCTL_UPDATE_DRAW DRM_IOW(0x3f, struct drm_update_draw) + +#define DRM_IOCTL_MODE_GETRESOURCES DRM_IOWR(0xA0, struct drm_mode_card_res) +#define DRM_IOCTL_MODE_GETCRTC DRM_IOWR(0xA1, struct drm_mode_crtc) +#define DRM_IOCTL_MODE_SETCRTC DRM_IOWR(0xA2, struct drm_mode_crtc) +#define DRM_IOCTL_MODE_CURSOR DRM_IOWR(0xA3, struct drm_mode_cursor) +#define DRM_IOCTL_MODE_GETGAMMA DRM_IOWR(0xA4, struct drm_mode_crtc_lut) +#define DRM_IOCTL_MODE_SETGAMMA DRM_IOWR(0xA5, struct drm_mode_crtc_lut) +#define DRM_IOCTL_MODE_GETENCODER DRM_IOWR(0xA6, struct drm_mode_get_encoder) +#define DRM_IOCTL_MODE_GETCONNECTOR \ + DRM_IOWR(0xA7, struct drm_mode_get_connector) +#define DRM_IOCTL_MODE_ATTACHMODE \ + DRM_IOWR(0xA8, struct drm_mode_mode_cmd) /* deprecated (never worked) \ + */ +#define DRM_IOCTL_MODE_DETACHMODE \ + DRM_IOWR(0xA9, struct drm_mode_mode_cmd) /* deprecated (never worked) \ + */ + +#define DRM_IOCTL_MODE_GETPROPERTY DRM_IOWR(0xAA, struct drm_mode_get_property) +#define DRM_IOCTL_MODE_SETPROPERTY \ + DRM_IOWR(0xAB, struct drm_mode_connector_set_property) +#define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob) +#define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd) +#define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd) +/** + * DRM_IOCTL_MODE_RMFB - Remove a framebuffer. + * + * This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * Warning: removing a framebuffer currently in-use on an enabled plane will + * disable that plane. The CRTC the plane is linked to may also be disabled + * (depending on driver capabilities). + */ +#define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int) +#define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) +#define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) + +/** + * DRM_IOCTL_MODE_CREATE_DUMB - Create a new dumb buffer object. + * + * KMS dumb buffers provide a very primitive way to allocate a buffer object + * suitable for scanout and map it for software rendering. KMS dumb buffers are + * not suitable for hardware-accelerated rendering nor video decoding. KMS dumb + * buffers are not suitable to be displayed on any other device than the KMS + * device where they were allocated from. Also see + * :ref:`kms_dumb_buffer_objects`. + * + * The IOCTL argument is a struct drm_mode_create_dumb. + * + * User-space is expected to create a KMS dumb buffer via this IOCTL, then add + * it as a KMS framebuffer via &DRM_IOCTL_MODE_ADDFB and map it via + * &DRM_IOCTL_MODE_MAP_DUMB. + * + * &DRM_CAP_DUMB_BUFFER indicates whether this IOCTL is supported. + * &DRM_CAP_DUMB_PREFERRED_DEPTH and &DRM_CAP_DUMB_PREFER_SHADOW indicate + * driver preferences for dumb buffers. + */ +#define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb) +#define DRM_IOCTL_MODE_MAP_DUMB DRM_IOWR(0xB3, struct drm_mode_map_dumb) +#define DRM_IOCTL_MODE_DESTROY_DUMB DRM_IOWR(0xB4, struct drm_mode_destroy_dumb) +#define DRM_IOCTL_MODE_GETPLANERESOURCES \ + DRM_IOWR(0xB5, struct drm_mode_get_plane_res) +#define DRM_IOCTL_MODE_GETPLANE DRM_IOWR(0xB6, struct drm_mode_get_plane) +#define DRM_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct drm_mode_set_plane) +#define DRM_IOCTL_MODE_ADDFB2 DRM_IOWR(0xB8, struct drm_mode_fb_cmd2) +#define DRM_IOCTL_MODE_OBJ_GETPROPERTIES \ + DRM_IOWR(0xB9, struct drm_mode_obj_get_properties) +#define DRM_IOCTL_MODE_OBJ_SETPROPERTY \ + DRM_IOWR(0xBA, struct drm_mode_obj_set_property) +#define DRM_IOCTL_MODE_CURSOR2 DRM_IOWR(0xBB, struct drm_mode_cursor2) +#define DRM_IOCTL_MODE_ATOMIC DRM_IOWR(0xBC, struct drm_mode_atomic) +#define DRM_IOCTL_MODE_CREATEPROPBLOB \ + DRM_IOWR(0xBD, struct drm_mode_create_blob) +#define DRM_IOCTL_MODE_DESTROYPROPBLOB \ + DRM_IOWR(0xBE, struct drm_mode_destroy_blob) + +#define DRM_IOCTL_SYNCOBJ_CREATE DRM_IOWR(0xBF, struct drm_syncobj_create) +#define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) +#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) +#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) +#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array) + +#define DRM_IOCTL_MODE_CREATE_LEASE DRM_IOWR(0xC6, struct drm_mode_create_lease) +#define DRM_IOCTL_MODE_LIST_LESSEES DRM_IOWR(0xC7, struct drm_mode_list_lessees) +#define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct drm_mode_get_lease) +#define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct drm_mode_revoke_lease) + +#define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT \ + DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait) +#define DRM_IOCTL_SYNCOBJ_QUERY \ + DRM_IOWR(0xCB, struct drm_syncobj_timeline_array) +#define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL \ + DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) + +/** + * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata. + * + * This queries metadata about a framebuffer. User-space fills + * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the + * struct as the output. + * + * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles + * will be filled with GEM buffer handles. Fresh new GEM handles are always + * returned, even if another GEM handle referring to the same memory object + * already exists on the DRM file description. The caller is responsible for + * removing the new handles, e.g. via the &DRM_IOCTL_GEM_CLOSE IOCTL. The same + * new handle will be returned for multiple planes in case they use the same + * memory object. Planes are valid until one has a zero handle -- this can be + * used to compute the number of planes. + * + * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid + * until one has a zero &drm_mode_fb_cmd2.pitches. + * + * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set + * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the + * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier. + * + * To obtain DMA-BUF FDs for each plane without leaking GEM handles, user-space + * can export each handle via &DRM_IOCTL_PRIME_HANDLE_TO_FD, then immediately + * close each unique handle via &DRM_IOCTL_GEM_CLOSE, making sure to not + * double-close handles which are specified multiple times in the array. + */ +#define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) + +#define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd) + +/** + * DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer. + * + * This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable + * planes and CRTCs. As long as the framebuffer is used by a plane, it's kept + * alive. When the plane no longer uses the framebuffer (because the + * framebuffer is replaced with another one, or the plane is disabled), the + * framebuffer is cleaned up. + * + * This is useful to implement flicker-free transitions between two processes. + * + * Depending on the threat model, user-space may want to ensure that the + * framebuffer doesn't expose any sensitive user information: closed + * framebuffers attached to a plane can be read back by the next DRM master. + */ +#define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) + +/** + * DRM_IOCTL_SET_CLIENT_NAME - Attach a name to a drm_file + * + * Having a name allows for easier tracking and debugging. + * The length of the name (without null ending char) must be + * <= DRM_CLIENT_NAME_MAX_LEN. + * The call will fail if the name contains whitespaces or non-printable chars. + */ +#define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name) + +/* + * Device specific ioctls should only be in their respective headers + * The device specific ioctl range is from 0x40 to 0x9f. + * Generic IOCTLS restart at 0xA0. + * + * \sa drmCommandNone(), drmCommandRead(), drmCommandWrite(), and + * drmCommandReadWrite(). + */ +#define DRM_COMMAND_BASE 0x40 +#define DRM_COMMAND_END 0xA0 + +/** + * struct drm_event - Header for DRM events + * @type: event type. + * @length: total number of payload bytes (including header). + * + * This struct is a header for events written back to user-space on the DRM FD. + * A read on the DRM FD will always only return complete events: e.g. if the + * read buffer is 100 bytes large and there are two 64 byte events pending, + * only one will be returned. + * + * Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and + * up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK, + * &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE. + */ +struct drm_event { + __u32 type; + __u32 length; +}; + +/** + * DRM_EVENT_VBLANK - vertical blanking event + * + * This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the + * &_DRM_VBLANK_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ +#define DRM_EVENT_VBLANK 0x01 +/** + * DRM_EVENT_FLIP_COMPLETE - page-flip completion event + * + * This event is sent in response to an atomic commit or legacy page-flip with + * the &DRM_MODE_PAGE_FLIP_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ +#define DRM_EVENT_FLIP_COMPLETE 0x02 +/** + * DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event + * + * This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE. + * + * The event payload is a struct drm_event_crtc_sequence. + */ +#define DRM_EVENT_CRTC_SEQUENCE 0x03 + +struct drm_event_vblank { + struct drm_event base; + __u64 user_data; + __u32 tv_sec; + __u32 tv_usec; + __u32 sequence; + __u32 crtc_id; /* 0 on older kernels that do not support this */ +}; + +/* Event delivered at sequence. Time stamp marks when the first pixel + * of the refresh cycle leaves the display engine for the display + */ +struct drm_event_crtc_sequence { + struct drm_event base; + __u64 user_data; + __s64 time_ns; + __u64 sequence; +}; + +/* typedef area */ +typedef struct drm_clip_rect drm_clip_rect_t; +typedef struct drm_drawable_info drm_drawable_info_t; +typedef struct drm_tex_region drm_tex_region_t; +typedef struct drm_hw_lock drm_hw_lock_t; +typedef struct drm_version drm_version_t; +typedef struct drm_unique drm_unique_t; +typedef struct drm_list drm_list_t; +typedef struct drm_block drm_block_t; +typedef struct drm_control drm_control_t; +typedef enum drm_map_type drm_map_type_t; +typedef enum drm_map_flags drm_map_flags_t; +typedef struct drm_ctx_priv_map drm_ctx_priv_map_t; +typedef struct drm_map drm_map_t; +typedef struct drm_client drm_client_t; +typedef enum drm_stat_type drm_stat_type_t; +typedef struct drm_stats drm_stats_t; +typedef enum drm_lock_flags drm_lock_flags_t; +typedef struct drm_lock drm_lock_t; +typedef enum drm_dma_flags drm_dma_flags_t; +typedef struct drm_buf_desc drm_buf_desc_t; +typedef struct drm_buf_info drm_buf_info_t; +typedef struct drm_buf_free drm_buf_free_t; +typedef struct drm_buf_pub drm_buf_pub_t; +typedef struct drm_buf_map drm_buf_map_t; +typedef struct drm_dma drm_dma_t; +typedef union drm_wait_vblank drm_wait_vblank_t; +typedef struct drm_agp_mode drm_agp_mode_t; +typedef enum drm_ctx_flags drm_ctx_flags_t; +typedef struct drm_ctx drm_ctx_t; +typedef struct drm_ctx_res drm_ctx_res_t; +typedef struct drm_draw drm_draw_t; +typedef struct drm_update_draw drm_update_draw_t; +typedef struct drm_auth drm_auth_t; +typedef struct drm_irq_busid drm_irq_busid_t; +typedef enum drm_vblank_seq_type drm_vblank_seq_type_t; + +typedef struct drm_agp_buffer drm_agp_buffer_t; +typedef struct drm_agp_binding drm_agp_binding_t; +typedef struct drm_agp_info drm_agp_info_t; +typedef struct drm_scatter_gather drm_scatter_gather_t; +typedef struct drm_set_version drm_set_version_t; + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/include/arch/x86_64/drm/drm_fourcc.h b/include/arch/x86_64/drm/drm_fourcc.h new file mode 100644 index 00000000..b37e02b3 --- /dev/null +++ b/include/arch/x86_64/drm/drm_fourcc.h @@ -0,0 +1,2002 @@ +/* + * Copyright 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef DRM_FOURCC_H +#define DRM_FOURCC_H + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/** + * DOC: overview + * + * In the DRM subsystem, framebuffer pixel formats are described using the + * fourcc codes defined in `include/uapi/drm/drm_fourcc.h`. In addition to the + * fourcc code, a Format Modifier may optionally be provided, in order to + * further describe the buffer's format - for example tiling or compression. + * + * Format Modifiers + * ---------------- + * + * Format modifiers are used in conjunction with a fourcc code, forming a + * unique fourcc:modifier pair. This format:modifier pair must fully define the + * format and data layout of the buffer, and should be the only way to describe + * that particular buffer. + * + * Having multiple fourcc:modifier pairs which describe the same layout should + * be avoided, as such aliases run the risk of different drivers exposing + * different names for the same data format, forcing userspace to understand + * that they are aliases. + * + * Format modifiers may change any property of the buffer, including the number + * of planes and/or the required allocation size. Format modifiers are + * vendor-namespaced, and as such the relationship between a fourcc code and a + * modifier is specific to the modifier being used. For example, some modifiers + * may preserve meaning - such as number of planes - from the fourcc code, + * whereas others may not. + * + * Modifiers must uniquely encode buffer layout. In other words, a buffer must + * match only a single modifier. A modifier must not be a subset of layouts of + * another modifier. For instance, it's incorrect to encode pitch alignment in + * a modifier: a buffer may match a 64-pixel aligned modifier and a 32-pixel + * aligned modifier. That said, modifiers can have implicit minimal + * requirements. + * + * For modifiers where the combination of fourcc code and modifier can alias, + * a canonical pair needs to be defined and used by all drivers. Preferred + * combinations are also encouraged where all combinations might lead to + * confusion and unnecessarily reduced interoperability. An example for the + * latter is AFBC, where the ABGR layouts are preferred over ARGB layouts. + * + * There are two kinds of modifier users: + * + * - Kernel and user-space drivers: for drivers it's important that modifiers + * don't alias, otherwise two drivers might support the same format but use + * different aliases, preventing them from sharing buffers in an efficient + * format. + * - Higher-level programs interfacing with KMS/GBM/EGL/Vulkan/etc: these users + * see modifiers as opaque tokens they can check for equality and intersect. + * These users mustn't need to know to reason about the modifier value + * (i.e. they are not expected to extract information out of the modifier). + * + * Vendors should document their modifier usage in as much detail as + * possible, to ensure maximum compatibility across devices, drivers and + * applications. + * + * The authoritative list of format modifier codes is found in + * `include/uapi/drm/drm_fourcc.h` + * + * Open Source User Waiver + * ----------------------- + * + * Because this is the authoritative source for pixel formats and modifiers + * referenced by GL, Vulkan extensions and other standards and hence used both + * by open source and closed source driver stacks, the usual requirement for an + * upstream in-kernel or open source userspace user does not apply. + * + * To ensure, as much as feasible, compatibility across stacks and avoid + * confusion with incompatible enumerations stakeholders for all relevant driver + * stacks should approve additions. + */ + +#define fourcc_code(a, b, c, d) \ + ((__u32)(a) | ((__u32)(b) << 8) | ((__u32)(c) << 16) | \ + ((__u32)(d) << 24)) + +#define DRM_FORMAT_BIG_ENDIAN \ + (1U << 31) /* format is big endian instead of little endian */ + +/* Reserve 0 for the invalid format specifier */ +#define DRM_FORMAT_INVALID 0 + +/* color index */ +#define DRM_FORMAT_C1 \ + fourcc_code('C', '1', ' ', ' ') /* [7:0] C0:C1:C2:C3:C4:C5:C6:C7 \ + 1:1:1:1:1:1:1:1 eight pixels/byte \ + */ +#define DRM_FORMAT_C2 \ + fourcc_code('C', '2', ' ', ' ') /* [7:0] C0:C1:C2:C3 2:2:2:2 four \ + pixels/byte */ +#define DRM_FORMAT_C4 \ + fourcc_code('C', '4', ' ', ' ') /* [7:0] C0:C1 4:4 two pixels/byte */ +#define DRM_FORMAT_C8 fourcc_code('C', '8', ' ', ' ') /* [7:0] C */ + +/* 1 bpp Darkness (inverse relationship between channel value and brightness) */ +#define DRM_FORMAT_D1 \ + fourcc_code('D', '1', ' ', ' ') /* [7:0] D0:D1:D2:D3:D4:D5:D6:D7 \ + 1:1:1:1:1:1:1:1 eight pixels/byte \ + */ + +/* 2 bpp Darkness (inverse relationship between channel value and brightness) */ +#define DRM_FORMAT_D2 \ + fourcc_code('D', '2', ' ', ' ') /* [7:0] D0:D1:D2:D3 2:2:2:2 four \ + pixels/byte */ + +/* 4 bpp Darkness (inverse relationship between channel value and brightness) */ +#define DRM_FORMAT_D4 \ + fourcc_code('D', '4', ' ', ' ') /* [7:0] D0:D1 4:4 two pixels/byte */ + +/* 8 bpp Darkness (inverse relationship between channel value and brightness) */ +#define DRM_FORMAT_D8 fourcc_code('D', '8', ' ', ' ') /* [7:0] D */ + +/* 1 bpp Red (direct relationship between channel value and brightness) */ +#define DRM_FORMAT_R1 \ + fourcc_code('R', '1', ' ', ' ') /* [7:0] R0:R1:R2:R3:R4:R5:R6:R7 \ + 1:1:1:1:1:1:1:1 eight pixels/byte \ + */ + +/* 2 bpp Red (direct relationship between channel value and brightness) */ +#define DRM_FORMAT_R2 \ + fourcc_code('R', '2', ' ', ' ') /* [7:0] R0:R1:R2:R3 2:2:2:2 four \ + pixels/byte */ + +/* 4 bpp Red (direct relationship between channel value and brightness) */ +#define DRM_FORMAT_R4 \ + fourcc_code('R', '4', ' ', ' ') /* [7:0] R0:R1 4:4 two pixels/byte */ + +/* 8 bpp Red (direct relationship between channel value and brightness) */ +#define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ + +/* 10 bpp Red (direct relationship between channel value and brightness) */ +#define DRM_FORMAT_R10 \ + fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */ + +/* 12 bpp Red (direct relationship between channel value and brightness) */ +#define DRM_FORMAT_R12 \ + fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */ + +/* 16 bpp Red (direct relationship between channel value and brightness) */ +#define DRM_FORMAT_R16 \ + fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */ + +/* 16 bpp RG */ +#define DRM_FORMAT_RG88 \ + fourcc_code('R', 'G', '8', '8') /* [15:0] R:G 8:8 little endian */ +#define DRM_FORMAT_GR88 \ + fourcc_code('G', 'R', '8', '8') /* [15:0] G:R 8:8 little endian */ + +/* 32 bpp RG */ +#define DRM_FORMAT_RG1616 \ + fourcc_code('R', 'G', '3', '2') /* [31:0] R:G 16:16 little endian */ +#define DRM_FORMAT_GR1616 \ + fourcc_code('G', 'R', '3', '2') /* [31:0] G:R 16:16 little endian */ + +/* 8 bpp RGB */ +#define DRM_FORMAT_RGB332 \ + fourcc_code('R', 'G', 'B', '8') /* [7:0] R:G:B 3:3:2 */ +#define DRM_FORMAT_BGR233 \ + fourcc_code('B', 'G', 'R', '8') /* [7:0] B:G:R 2:3:3 */ + +/* 16 bpp RGB */ +#define DRM_FORMAT_XRGB4444 \ + fourcc_code('X', 'R', '1', '2') /* [15:0] x:R:G:B 4:4:4:4 little \ + endian */ +#define DRM_FORMAT_XBGR4444 \ + fourcc_code('X', 'B', '1', '2') /* [15:0] x:B:G:R 4:4:4:4 little \ + endian */ +#define DRM_FORMAT_RGBX4444 \ + fourcc_code('R', 'X', '1', '2') /* [15:0] R:G:B:x 4:4:4:4 little \ + endian */ +#define DRM_FORMAT_BGRX4444 \ + fourcc_code('B', 'X', '1', '2') /* [15:0] B:G:R:x 4:4:4:4 little \ + endian */ + +#define DRM_FORMAT_ARGB4444 \ + fourcc_code('A', 'R', '1', '2') /* [15:0] A:R:G:B 4:4:4:4 little \ + endian */ +#define DRM_FORMAT_ABGR4444 \ + fourcc_code('A', 'B', '1', '2') /* [15:0] A:B:G:R 4:4:4:4 little \ + endian */ +#define DRM_FORMAT_RGBA4444 \ + fourcc_code('R', 'A', '1', '2') /* [15:0] R:G:B:A 4:4:4:4 little \ + endian */ +#define DRM_FORMAT_BGRA4444 \ + fourcc_code('B', 'A', '1', '2') /* [15:0] B:G:R:A 4:4:4:4 little \ + endian */ + +#define DRM_FORMAT_XRGB1555 \ + fourcc_code('X', 'R', '1', '5') /* [15:0] x:R:G:B 1:5:5:5 little \ + endian */ +#define DRM_FORMAT_XBGR1555 \ + fourcc_code('X', 'B', '1', '5') /* [15:0] x:B:G:R 1:5:5:5 little \ + endian */ +#define DRM_FORMAT_RGBX5551 \ + fourcc_code('R', 'X', '1', '5') /* [15:0] R:G:B:x 5:5:5:1 little \ + endian */ +#define DRM_FORMAT_BGRX5551 \ + fourcc_code('B', 'X', '1', '5') /* [15:0] B:G:R:x 5:5:5:1 little \ + endian */ + +#define DRM_FORMAT_ARGB1555 \ + fourcc_code('A', 'R', '1', '5') /* [15:0] A:R:G:B 1:5:5:5 little \ + endian */ +#define DRM_FORMAT_ABGR1555 \ + fourcc_code('A', 'B', '1', '5') /* [15:0] A:B:G:R 1:5:5:5 little \ + endian */ +#define DRM_FORMAT_RGBA5551 \ + fourcc_code('R', 'A', '1', '5') /* [15:0] R:G:B:A 5:5:5:1 little \ + endian */ +#define DRM_FORMAT_BGRA5551 \ + fourcc_code('B', 'A', '1', '5') /* [15:0] B:G:R:A 5:5:5:1 little \ + endian */ + +#define DRM_FORMAT_RGB565 \ + fourcc_code('R', 'G', '1', '6') /* [15:0] R:G:B 5:6:5 little endian */ +#define DRM_FORMAT_BGR565 \ + fourcc_code('B', 'G', '1', '6') /* [15:0] B:G:R 5:6:5 little endian */ + +/* 24 bpp RGB */ +#define DRM_FORMAT_RGB888 \ + fourcc_code('R', 'G', '2', '4') /* [23:0] R:G:B little endian */ +#define DRM_FORMAT_BGR888 \ + fourcc_code('B', 'G', '2', '4') /* [23:0] B:G:R little endian */ + +/* 32 bpp RGB */ +#define DRM_FORMAT_XRGB8888 \ + fourcc_code('X', 'R', '2', '4') /* [31:0] x:R:G:B 8:8:8:8 little \ + endian */ +#define DRM_FORMAT_XBGR8888 \ + fourcc_code('X', 'B', '2', '4') /* [31:0] x:B:G:R 8:8:8:8 little \ + endian */ +#define DRM_FORMAT_RGBX8888 \ + fourcc_code('R', 'X', '2', '4') /* [31:0] R:G:B:x 8:8:8:8 little \ + endian */ +#define DRM_FORMAT_BGRX8888 \ + fourcc_code('B', 'X', '2', '4') /* [31:0] B:G:R:x 8:8:8:8 little \ + endian */ + +#define DRM_FORMAT_ARGB8888 \ + fourcc_code('A', 'R', '2', '4') /* [31:0] A:R:G:B 8:8:8:8 little \ + endian */ +#define DRM_FORMAT_ABGR8888 \ + fourcc_code('A', 'B', '2', '4') /* [31:0] A:B:G:R 8:8:8:8 little \ + endian */ +#define DRM_FORMAT_RGBA8888 \ + fourcc_code('R', 'A', '2', '4') /* [31:0] R:G:B:A 8:8:8:8 little \ + endian */ +#define DRM_FORMAT_BGRA8888 \ + fourcc_code('B', 'A', '2', '4') /* [31:0] B:G:R:A 8:8:8:8 little \ + endian */ + +#define DRM_FORMAT_XRGB2101010 \ + fourcc_code('X', 'R', '3', '0') /* [31:0] x:R:G:B 2:10:10:10 little \ + endian */ +#define DRM_FORMAT_XBGR2101010 \ + fourcc_code('X', 'B', '3', '0') /* [31:0] x:B:G:R 2:10:10:10 little \ + endian */ +#define DRM_FORMAT_RGBX1010102 \ + fourcc_code('R', 'X', '3', '0') /* [31:0] R:G:B:x 10:10:10:2 little \ + endian */ +#define DRM_FORMAT_BGRX1010102 \ + fourcc_code('B', 'X', '3', '0') /* [31:0] B:G:R:x 10:10:10:2 little \ + endian */ + +#define DRM_FORMAT_ARGB2101010 \ + fourcc_code('A', 'R', '3', '0') /* [31:0] A:R:G:B 2:10:10:10 little \ + endian */ +#define DRM_FORMAT_ABGR2101010 \ + fourcc_code('A', 'B', '3', '0') /* [31:0] A:B:G:R 2:10:10:10 little \ + endian */ +#define DRM_FORMAT_RGBA1010102 \ + fourcc_code('R', 'A', '3', '0') /* [31:0] R:G:B:A 10:10:10:2 little \ + endian */ +#define DRM_FORMAT_BGRA1010102 \ + fourcc_code('B', 'A', '3', '0') /* [31:0] B:G:R:A 10:10:10:2 little \ + endian */ + +/* 48 bpp RGB */ +#define DRM_FORMAT_RGB161616 \ + fourcc_code('R', 'G', '4', '8') /* [47:0] R:G:B 16:16:16 little endian \ + */ +#define DRM_FORMAT_BGR161616 \ + fourcc_code('B', 'G', '4', '8') /* [47:0] B:G:R 16:16:16 little endian \ + */ + +/* 64 bpp RGB */ +#define DRM_FORMAT_XRGB16161616 \ + fourcc_code('X', 'R', '4', '8') /* [63:0] x:R:G:B 16:16:16:16 little \ + endian */ +#define DRM_FORMAT_XBGR16161616 \ + fourcc_code('X', 'B', '4', '8') /* [63:0] x:B:G:R 16:16:16:16 little \ + endian */ + +#define DRM_FORMAT_ARGB16161616 \ + fourcc_code('A', 'R', '4', '8') /* [63:0] A:R:G:B 16:16:16:16 little \ + endian */ +#define DRM_FORMAT_ABGR16161616 \ + fourcc_code('A', 'B', '4', '8') /* [63:0] A:B:G:R 16:16:16:16 little \ + endian */ + +/* + * Half-Floating point - 16b/component + * IEEE 754-2008 binary16 half-precision float + * [15:0] sign:exponent:mantissa 1:5:10 + */ +#define DRM_FORMAT_XRGB16161616F \ + fourcc_code('X', 'R', '4', 'H') /* [63:0] x:R:G:B 16:16:16:16 little \ + endian */ +#define DRM_FORMAT_XBGR16161616F \ + fourcc_code('X', 'B', '4', 'H') /* [63:0] x:B:G:R 16:16:16:16 little \ + endian */ + +#define DRM_FORMAT_ARGB16161616F \ + fourcc_code('A', 'R', '4', 'H') /* [63:0] A:R:G:B 16:16:16:16 little \ + endian */ +#define DRM_FORMAT_ABGR16161616F \ + fourcc_code('A', 'B', '4', 'H') /* [63:0] A:B:G:R 16:16:16:16 little \ + endian */ + +#define DRM_FORMAT_R16F \ + fourcc_code('R', ' ', ' ', 'H') /* [15:0] R 16 little endian */ +#define DRM_FORMAT_GR1616F \ + fourcc_code('G', 'R', ' ', 'H') /* [31:0] G:R 16:16 little endian */ +#define DRM_FORMAT_BGR161616F \ + fourcc_code('B', 'G', 'R', 'H') /* [47:0] B:G:R 16:16:16 little endian \ + */ + +/* + * Floating point - 32b/component + * IEEE 754-2008 binary32 float + * [31:0] sign:exponent:mantissa 1:8:23 + */ +#define DRM_FORMAT_R32F \ + fourcc_code('R', ' ', ' ', 'F') /* [31:0] R 32 little endian */ +#define DRM_FORMAT_GR3232F \ + fourcc_code('G', 'R', ' ', 'F') /* [63:0] R:G 32:32 little endian */ +#define DRM_FORMAT_BGR323232F \ + fourcc_code('B', 'G', 'R', 'F') /* [95:0] R:G:B 32:32:32 little endian \ + */ +#define DRM_FORMAT_ABGR32323232F \ + fourcc_code('A', 'B', '8', 'F') /* [127:0] R:G:B:A 32:32:32:32 little \ + endian */ + +/* + * RGBA format with 10-bit components packed in 64-bit per pixel, with 6 bits + * of unused padding per component: + */ +#define DRM_FORMAT_AXBXGXRX106106106106 \ + fourcc_code('A', 'B', '1', '0') /* [63:0] A:x:B:x:G:x:R:x \ + 10:6:10:6:10:6:10:6 little endian \ + */ + +/* packed YCbCr */ +#define DRM_FORMAT_YUYV \ + fourcc_code('Y', 'U', 'Y', 'V') /* [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little \ + endian */ +#define DRM_FORMAT_YVYU \ + fourcc_code('Y', 'V', 'Y', 'U') /* [31:0] Cb0:Y1:Cr0:Y0 8:8:8:8 little \ + endian */ +#define DRM_FORMAT_UYVY \ + fourcc_code('U', 'Y', 'V', 'Y') /* [31:0] Y1:Cr0:Y0:Cb0 8:8:8:8 little \ + endian */ +#define DRM_FORMAT_VYUY \ + fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little \ + endian */ + +#define DRM_FORMAT_AYUV \ + fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little \ + endian */ +#define DRM_FORMAT_AVUY8888 \ + fourcc_code('A', 'V', 'U', 'Y') /* [31:0] A:Cr:Cb:Y 8:8:8:8 little \ + endian */ +#define DRM_FORMAT_XYUV8888 \ + fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little \ + endian */ +#define DRM_FORMAT_XVUY8888 \ + fourcc_code('X', 'V', 'U', 'Y') /* [31:0] X:Cr:Cb:Y 8:8:8:8 little \ + endian */ +#define DRM_FORMAT_VUY888 \ + fourcc_code('V', 'U', '2', '4') /* [23:0] Cr:Cb:Y 8:8:8 little endian \ + */ +#define DRM_FORMAT_VUY101010 \ + fourcc_code('V', 'U', '3', '0') /* Y followed by U then V, 10:10:10. \ + Non-linear modifier only */ + +/* + * packed Y2xx indicate for each component, xx valid data occupy msb + * 16-xx padding occupy lsb + */ +#define DRM_FORMAT_Y210 \ + fourcc_code('Y', '2', '1', '0') /* [63:0] Cr0:0:Y1:0:Cb0:0:Y0:0 \ + 10:6:10:6:10:6:10:6 little endian \ + per 2 Y pixels */ +#define DRM_FORMAT_Y212 \ + fourcc_code('Y', '2', '1', '2') /* [63:0] Cr0:0:Y1:0:Cb0:0:Y0:0 \ + 12:4:12:4:12:4:12:4 little endian \ + per 2 Y pixels */ +#define DRM_FORMAT_Y216 \ + fourcc_code('Y', '2', '1', '6') /* [63:0] Cr0:Y1:Cb0:Y0 16:16:16:16 \ + little endian per 2 Y pixels */ + +/* + * packed Y4xx indicate for each component, xx valid data occupy msb + * 16-xx padding occupy lsb except Y410 + */ +#define DRM_FORMAT_Y410 \ + fourcc_code('Y', '4', '1', '0') /* [31:0] A:Cr:Y:Cb 2:10:10:10 little \ + endian */ +#define DRM_FORMAT_Y412 \ + fourcc_code('Y', '4', '1', '2') /* [63:0] A:0:Cr:0:Y:0:Cb:0 \ + 12:4:12:4:12:4:12:4 little endian \ + */ +#define DRM_FORMAT_Y416 \ + fourcc_code('Y', '4', '1', '6') /* [63:0] A:Cr:Y:Cb 16:16:16:16 little \ + endian */ + +#define DRM_FORMAT_XVYU2101010 \ + fourcc_code('X', 'V', '3', '0') /* [31:0] X:Cr:Y:Cb 2:10:10:10 little \ + endian */ +#define DRM_FORMAT_XVYU12_16161616 \ + fourcc_code('X', 'V', '3', '6') /* [63:0] X:0:Cr:0:Y:0:Cb:0 \ + 12:4:12:4:12:4:12:4 little endian \ + */ +#define DRM_FORMAT_XVYU16161616 \ + fourcc_code('X', 'V', '4', '8') /* [63:0] X:Cr:Y:Cb 16:16:16:16 little \ + endian */ + +/* + * packed YCbCr420 2x2 tiled formats + * first 64 bits will contain Y,Cb,Cr components for a 2x2 tile + */ +/* [63:0] A3:A2:Y3:0:Cr0:0:Y2:0:A1:A0:Y1:0:Cb0:0:Y0:0 + * 1:1:8:2:8:2:8:2:1:1:8:2:8:2:8:2 little endian */ +#define DRM_FORMAT_Y0L0 fourcc_code('Y', '0', 'L', '0') +/* [63:0] X3:X2:Y3:0:Cr0:0:Y2:0:X1:X0:Y1:0:Cb0:0:Y0:0 + * 1:1:8:2:8:2:8:2:1:1:8:2:8:2:8:2 little endian */ +#define DRM_FORMAT_X0L0 fourcc_code('X', '0', 'L', '0') + +/* [63:0] A3:A2:Y3:Cr0:Y2:A1:A0:Y1:Cb0:Y0 1:1:10:10:10:1:1:10:10:10 little + * endian */ +#define DRM_FORMAT_Y0L2 fourcc_code('Y', '0', 'L', '2') +/* [63:0] X3:X2:Y3:Cr0:Y2:X1:X0:Y1:Cb0:Y0 1:1:10:10:10:1:1:10:10:10 little + * endian */ +#define DRM_FORMAT_X0L2 fourcc_code('X', '0', 'L', '2') + +/* + * 1-plane YUV 4:2:0 + * In these formats, the component ordering is specified (Y, followed by U + * then V), but the exact Linear layout is undefined. + * These formats can only be used with a non-Linear modifier. + */ +#define DRM_FORMAT_YUV420_8BIT fourcc_code('Y', 'U', '0', '8') +#define DRM_FORMAT_YUV420_10BIT fourcc_code('Y', 'U', '1', '0') + +/* + * 2 plane RGB + A + * index 0 = RGB plane, same format as the corresponding non _A8 format has + * index 1 = A plane, [7:0] A + */ +#define DRM_FORMAT_XRGB8888_A8 fourcc_code('X', 'R', 'A', '8') +#define DRM_FORMAT_XBGR8888_A8 fourcc_code('X', 'B', 'A', '8') +#define DRM_FORMAT_RGBX8888_A8 fourcc_code('R', 'X', 'A', '8') +#define DRM_FORMAT_BGRX8888_A8 fourcc_code('B', 'X', 'A', '8') +#define DRM_FORMAT_RGB888_A8 fourcc_code('R', '8', 'A', '8') +#define DRM_FORMAT_BGR888_A8 fourcc_code('B', '8', 'A', '8') +#define DRM_FORMAT_RGB565_A8 fourcc_code('R', '5', 'A', '8') +#define DRM_FORMAT_BGR565_A8 fourcc_code('B', '5', 'A', '8') + +/* + * 2 plane YCbCr + * index 0 = Y plane, [7:0] Y + * index 1 = Cr:Cb plane, [15:0] Cr:Cb little endian + * or + * index 1 = Cb:Cr plane, [15:0] Cb:Cr little endian + */ +#define DRM_FORMAT_NV12 \ + fourcc_code('N', 'V', '1', '2') /* 2x2 subsampled Cr:Cb plane */ +#define DRM_FORMAT_NV21 \ + fourcc_code('N', 'V', '2', '1') /* 2x2 subsampled Cb:Cr plane */ +#define DRM_FORMAT_NV16 \ + fourcc_code('N', 'V', '1', '6') /* 2x1 subsampled Cr:Cb plane */ +#define DRM_FORMAT_NV61 \ + fourcc_code('N', 'V', '6', '1') /* 2x1 subsampled Cb:Cr plane */ +#define DRM_FORMAT_NV24 \ + fourcc_code('N', 'V', '2', '4') /* non-subsampled Cr:Cb plane */ +#define DRM_FORMAT_NV42 \ + fourcc_code('N', 'V', '4', '2') /* non-subsampled Cb:Cr plane */ +/* + * 2 plane YCbCr + * index 0 = Y plane, [39:0] Y3:Y2:Y1:Y0 little endian + * index 1 = Cr:Cb plane, [39:0] Cr1:Cb1:Cr0:Cb0 little endian + */ +#define DRM_FORMAT_NV15 \ + fourcc_code('N', 'V', '1', '5') /* 2x2 subsampled Cr:Cb plane */ +#define DRM_FORMAT_NV20 \ + fourcc_code('N', 'V', '2', '0') /* 2x1 subsampled Cr:Cb plane */ +#define DRM_FORMAT_NV30 \ + fourcc_code('N', 'V', '3', '0') /* non-subsampled Cr:Cb plane */ + +/* + * 2 plane YCbCr MSB aligned + * index 0 = Y plane, [15:0] Y:x [10:6] little endian + * index 1 = Cr:Cb plane, [31:0] Cr:x:Cb:x [10:6:10:6] little endian + */ +#define DRM_FORMAT_P210 \ + fourcc_code('P', '2', '1', '0') /* 2x1 subsampled Cr:Cb plane, 10 bit \ + per channel */ + +/* + * 2 plane YCbCr MSB aligned + * index 0 = Y plane, [15:0] Y:x [10:6] little endian + * index 1 = Cr:Cb plane, [31:0] Cr:x:Cb:x [10:6:10:6] little endian + */ +#define DRM_FORMAT_P010 \ + fourcc_code('P', '0', '1', '0') /* 2x2 subsampled Cr:Cb plane 10 bits \ + per channel */ + +/* + * 2 plane YCbCr MSB aligned + * index 0 = Y plane, [15:0] Y:x [12:4] little endian + * index 1 = Cr:Cb plane, [31:0] Cr:x:Cb:x [12:4:12:4] little endian + */ +#define DRM_FORMAT_P012 \ + fourcc_code('P', '0', '1', '2') /* 2x2 subsampled Cr:Cb plane 12 bits \ + per channel */ + +/* + * 2 plane YCbCr MSB aligned + * index 0 = Y plane, [15:0] Y little endian + * index 1 = Cr:Cb plane, [31:0] Cr:Cb [16:16] little endian + */ +#define DRM_FORMAT_P016 \ + fourcc_code('P', '0', '1', '6') /* 2x2 subsampled Cr:Cb plane 16 bits \ + per channel */ + +/* 2 plane YCbCr420. + * 3 10 bit components and 2 padding bits packed into 4 bytes. + * index 0 = Y plane, [31:0] x:Y2:Y1:Y0 2:10:10:10 little endian + * index 1 = Cr:Cb plane, [63:0] x:Cr2:Cb2:Cr1:x:Cb1:Cr0:Cb0 + * [2:10:10:10:2:10:10:10] little endian + */ +#define DRM_FORMAT_P030 \ + fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits \ + per channel packed */ + +/* 3 plane non-subsampled (444) YCbCr + * 16 bits per component, but only 10 bits are used and 6 bits are padded + * index 0: Y plane, [15:0] Y:x [10:6] little endian + * index 1: Cb plane, [15:0] Cb:x [10:6] little endian + * index 2: Cr plane, [15:0] Cr:x [10:6] little endian + */ +#define DRM_FORMAT_Q410 fourcc_code('Q', '4', '1', '0') + +/* 3 plane non-subsampled (444) YCrCb + * 16 bits per component, but only 10 bits are used and 6 bits are padded + * index 0: Y plane, [15:0] Y:x [10:6] little endian + * index 1: Cr plane, [15:0] Cr:x [10:6] little endian + * index 2: Cb plane, [15:0] Cb:x [10:6] little endian + */ +#define DRM_FORMAT_Q401 fourcc_code('Q', '4', '0', '1') + +/* + * 3 plane YCbCr LSB aligned + * In order to use these formats in a similar fashion to MSB aligned ones + * implementation can multiply the values by 2^6=64. For that reason the padding + * must only contain zeros. + * index 0 = Y plane, [15:0] z:Y [6:10] little endian + * index 1 = Cr plane, [15:0] z:Cr [6:10] little endian + * index 2 = Cb plane, [15:0] z:Cb [6:10] little endian + */ +#define DRM_FORMAT_S010 \ + fourcc_code('S', '0', '1', '0') /* 2x2 subsampled Cb (1) and Cr (2) \ + planes 10 bits per channel */ +#define DRM_FORMAT_S210 \ + fourcc_code('S', '2', '1', '0') /* 2x1 subsampled Cb (1) and Cr (2) \ + planes 10 bits per channel */ +#define DRM_FORMAT_S410 \ + fourcc_code('S', '4', '1', '0') /* non-subsampled Cb (1) and Cr (2) \ + planes 10 bits per channel */ + +/* + * 3 plane YCbCr LSB aligned + * In order to use these formats in a similar fashion to MSB aligned ones + * implementation can multiply the values by 2^4=16. For that reason the padding + * must only contain zeros. + * index 0 = Y plane, [15:0] z:Y [4:12] little endian + * index 1 = Cr plane, [15:0] z:Cr [4:12] little endian + * index 2 = Cb plane, [15:0] z:Cb [4:12] little endian + */ +#define DRM_FORMAT_S012 \ + fourcc_code('S', '0', '1', '2') /* 2x2 subsampled Cb (1) and Cr (2) \ + planes 12 bits per channel */ +#define DRM_FORMAT_S212 \ + fourcc_code('S', '2', '1', '2') /* 2x1 subsampled Cb (1) and Cr (2) \ + planes 12 bits per channel */ +#define DRM_FORMAT_S412 \ + fourcc_code('S', '4', '1', '2') /* non-subsampled Cb (1) and Cr (2) \ + planes 12 bits per channel */ + +/* + * 3 plane YCbCr + * index 0 = Y plane, [15:0] Y little endian + * index 1 = Cr plane, [15:0] Cr little endian + * index 2 = Cb plane, [15:0] Cb little endian + */ +#define DRM_FORMAT_S016 \ + fourcc_code('S', '0', '1', '6') /* 2x2 subsampled Cb (1) and Cr (2) \ + planes 16 bits per channel */ +#define DRM_FORMAT_S216 \ + fourcc_code('S', '2', '1', '6') /* 2x1 subsampled Cb (1) and Cr (2) \ + planes 16 bits per channel */ +#define DRM_FORMAT_S416 \ + fourcc_code('S', '4', '1', '6') /* non-subsampled Cb (1) and Cr (2) \ + planes 16 bits per channel */ + +/* + * 3 plane YCbCr + * index 0: Y plane, [7:0] Y + * index 1: Cb plane, [7:0] Cb + * index 2: Cr plane, [7:0] Cr + * or + * index 1: Cr plane, [7:0] Cr + * index 2: Cb plane, [7:0] Cb + */ +#define DRM_FORMAT_YUV410 \ + fourcc_code('Y', 'U', 'V', '9') /* 4x4 subsampled Cb (1) and Cr (2) \ + planes */ +#define DRM_FORMAT_YVU410 \ + fourcc_code('Y', 'V', 'U', '9') /* 4x4 subsampled Cr (1) and Cb (2) \ + planes */ +#define DRM_FORMAT_YUV411 \ + fourcc_code('Y', 'U', '1', '1') /* 4x1 subsampled Cb (1) and Cr (2) \ + planes */ +#define DRM_FORMAT_YVU411 \ + fourcc_code('Y', 'V', '1', '1') /* 4x1 subsampled Cr (1) and Cb (2) \ + planes */ +#define DRM_FORMAT_YUV420 \ + fourcc_code('Y', 'U', '1', '2') /* 2x2 subsampled Cb (1) and Cr (2) \ + planes */ +#define DRM_FORMAT_YVU420 \ + fourcc_code('Y', 'V', '1', '2') /* 2x2 subsampled Cr (1) and Cb (2) \ + planes */ +#define DRM_FORMAT_YUV422 \ + fourcc_code('Y', 'U', '1', '6') /* 2x1 subsampled Cb (1) and Cr (2) \ + planes */ +#define DRM_FORMAT_YVU422 \ + fourcc_code('Y', 'V', '1', '6') /* 2x1 subsampled Cr (1) and Cb (2) \ + planes */ +#define DRM_FORMAT_YUV444 \ + fourcc_code('Y', 'U', '2', '4') /* non-subsampled Cb (1) and Cr (2) \ + planes */ +#define DRM_FORMAT_YVU444 \ + fourcc_code('Y', 'V', '2', '4') /* non-subsampled Cr (1) and Cb (2) \ + planes */ + +/* + * Format Modifiers: + * + * Format modifiers describe, typically, a re-ordering or modification + * of the data in a plane of an FB. This can be used to express tiled/ + * swizzled formats, or compression, or a combination of the two. + * + * The upper 8 bits of the format modifier are a vendor-id as assigned + * below. The lower 56 bits are assigned as vendor sees fit. + */ + +/* Vendor Ids: */ +#define DRM_FORMAT_MOD_VENDOR_NONE 0 +#define DRM_FORMAT_MOD_VENDOR_INTEL 0x01 +#define DRM_FORMAT_MOD_VENDOR_AMD 0x02 +#define DRM_FORMAT_MOD_VENDOR_NVIDIA 0x03 +#define DRM_FORMAT_MOD_VENDOR_SAMSUNG 0x04 +#define DRM_FORMAT_MOD_VENDOR_QCOM 0x05 +#define DRM_FORMAT_MOD_VENDOR_VIVANTE 0x06 +#define DRM_FORMAT_MOD_VENDOR_BROADCOM 0x07 +#define DRM_FORMAT_MOD_VENDOR_ARM 0x08 +#define DRM_FORMAT_MOD_VENDOR_ALLWINNER 0x09 +#define DRM_FORMAT_MOD_VENDOR_AMLOGIC 0x0a +#define DRM_FORMAT_MOD_VENDOR_MTK 0x0b +#define DRM_FORMAT_MOD_VENDOR_APPLE 0x0c + +/* add more to the end as needed */ + +#define DRM_FORMAT_RESERVED ((1ULL << 56) - 1) + +#define fourcc_mod_get_vendor(modifier) (((modifier) >> 56) & 0xff) + +#define fourcc_mod_is_vendor(modifier, vendor) \ + (fourcc_mod_get_vendor(modifier) == DRM_FORMAT_MOD_VENDOR_##vendor) + +#define fourcc_mod_code(vendor, val) \ + ((((__u64)DRM_FORMAT_MOD_VENDOR_##vendor) << 56) | \ + ((val) & 0x00ffffffffffffffULL)) + +/* + * Format Modifier tokens: + * + * When adding a new token please document the layout with a code comment, + * similar to the fourcc codes above. drm_fourcc.h is considered the + * authoritative source for all of these. + * + * Generic modifier names: + * + * DRM_FORMAT_MOD_GENERIC_* definitions are used to provide vendor-neutral names + * for layouts which are common across multiple vendors. To preserve + * compatibility, in cases where a vendor-specific definition already exists and + * a generic name for it is desired, the common name is a purely symbolic alias + * and must use the same numerical value as the original definition. + * + * Note that generic names should only be used for modifiers which describe + * generic layouts (such as pixel re-ordering), which may have + * independently-developed support across multiple vendors. + * + * In future cases where a generic layout is identified before merging with a + * vendor-specific modifier, a new 'GENERIC' vendor or modifier using vendor + * 'NONE' could be considered. This should only be for obvious, exceptional + * cases to avoid polluting the 'GENERIC' namespace with modifiers which only + * apply to a single vendor. + * + * Generic names should not be used for cases where multiple hardware vendors + * have implementations of the same standardised compression scheme (such as + * AFBC). In those cases, all implementations should use the same format + * modifier(s), reflecting the vendor of the standard. + */ + +#define DRM_FORMAT_MOD_GENERIC_16_16_TILE DRM_FORMAT_MOD_SAMSUNG_16_16_TILE + +/* + * Invalid Modifier + * + * This modifier can be used as a sentinel to terminate the format modifiers + * list, or to initialize a variable with an invalid modifier. It might also be + * used to report an error back to userspace for certain APIs. + */ +#define DRM_FORMAT_MOD_INVALID fourcc_mod_code(NONE, DRM_FORMAT_RESERVED) + +/* + * Linear Layout + * + * Just plain linear layout. Note that this is different from no specifying any + * modifier (e.g. not setting DRM_MODE_FB_MODIFIERS in the DRM_ADDFB2 ioctl), + * which tells the driver to also take driver-internal information into account + * and so might actually result in a tiled framebuffer. + */ +#define DRM_FORMAT_MOD_LINEAR fourcc_mod_code(NONE, 0) + +/* + * Deprecated: use DRM_FORMAT_MOD_LINEAR instead + * + * The "none" format modifier doesn't actually mean that the modifier is + * implicit, instead it means that the layout is linear. Whether modifiers are + * used is out-of-band information carried in an API-specific way (e.g. in a + * flag for drm_mode_fb_cmd2). + */ +#define DRM_FORMAT_MOD_NONE 0 + +/* Intel framebuffer modifiers */ + +/* + * Intel X-tiling layout + * + * This is a tiled layout using 4Kb tiles (except on gen2 where the tiles 2Kb) + * in row-major layout. Within the tile bytes are laid out row-major, with + * a platform-dependent stride. On top of that the memory can apply + * platform-depending swizzling of some higher address bits into bit6. + * + * Note that this layout is only accurate on intel gen 8+ or valleyview + * chipsets. On earlier platforms the is highly platforms specific and not + * useful for cross-driver sharing. It exists since on a given platform it does + * uniquely identify the layout in a simple way for i915-specific userspace, + * which facilitated conversion of userspace to modifiers. Additionally the + * exact format on some really old platforms is not known. + */ +#define I915_FORMAT_MOD_X_TILED fourcc_mod_code(INTEL, 1) + +/* + * Intel Y-tiling layout + * + * This is a tiled layout using 4Kb tiles (except on gen2 where the tiles 2Kb) + * in row-major layout. Within the tile bytes are laid out in OWORD (16 bytes) + * chunks column-major, with a platform-dependent height. On top of that the + * memory can apply platform-depending swizzling of some higher address bits + * into bit6. + * + * Note that this layout is only accurate on intel gen 8+ or valleyview + * chipsets. On earlier platforms the is highly platforms specific and not + * useful for cross-driver sharing. It exists since on a given platform it does + * uniquely identify the layout in a simple way for i915-specific userspace, + * which facilitated conversion of userspace to modifiers. Additionally the + * exact format on some really old platforms is not known. + */ +#define I915_FORMAT_MOD_Y_TILED fourcc_mod_code(INTEL, 2) + +/* + * Intel Yf-tiling layout + * + * This is a tiled layout using 4Kb tiles in row-major layout. + * Within the tile pixels are laid out in 16 256 byte units / sub-tiles which + * are arranged in four groups (two wide, two high) with column-major layout. + * Each group therefore consists out of four 256 byte units, which are also laid + * out as 2x2 column-major. + * 256 byte units are made out of four 64 byte blocks of pixels, producing + * either a square block or a 2:1 unit. + * 64 byte blocks of pixels contain four pixel rows of 16 bytes, where the width + * in pixel depends on the pixel depth. + */ +#define I915_FORMAT_MOD_Yf_TILED fourcc_mod_code(INTEL, 3) + +/* + * Intel color control surface (CCS) for render compression + * + * The framebuffer format must be one of the 8:8:8:8 RGB formats. + * The main surface will be plane index 0 and must be Y/Yf-tiled, + * the CCS will be plane index 1. + * + * Each CCS tile matches a 1024x512 pixel area of the main surface. + * To match certain aspects of the 3D hardware the CCS is + * considered to be made up of normal 128Bx32 Y tiles, Thus + * the CCS pitch must be specified in multiples of 128 bytes. + * + * In reality the CCS tile appears to be a 64Bx64 Y tile, composed + * of QWORD (8 bytes) chunks instead of OWORD (16 bytes) chunks. + * But that fact is not relevant unless the memory is accessed + * directly. + */ +#define I915_FORMAT_MOD_Y_TILED_CCS fourcc_mod_code(INTEL, 4) +#define I915_FORMAT_MOD_Yf_TILED_CCS fourcc_mod_code(INTEL, 5) + +/* + * Intel color control surfaces (CCS) for Gen-12 render compression. + * + * The main surface is Y-tiled and at plane index 0, the CCS is linear and + * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in + * main surface. In other words, 4 bits in CCS map to a main surface cache + * line pair. The main surface pitch is required to be a multiple of four + * Y-tile widths. + */ +#define I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS fourcc_mod_code(INTEL, 6) + +/* + * Intel color control surfaces (CCS) for Gen-12 media compression + * + * The main surface is Y-tiled and at plane index 0, the CCS is linear and + * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in + * main surface. In other words, 4 bits in CCS map to a main surface cache + * line pair. The main surface pitch is required to be a multiple of four + * Y-tile widths. For semi-planar formats like NV12, CCS planes follow the + * Y and UV planes i.e., planes 0 and 1 are used for Y and UV surfaces, + * planes 2 and 3 for the respective CCS. + */ +#define I915_FORMAT_MOD_Y_TILED_GEN12_MC_CCS fourcc_mod_code(INTEL, 7) + +/* + * Intel Color Control Surface with Clear Color (CCS) for Gen-12 render + * compression. + * + * The main surface is Y-tiled and is at plane index 0 whereas CCS is linear + * and at index 1. The clear color is stored at index 2, and the pitch should + * be 64 bytes aligned. The clear color structure is 256 bits. The first 128 + * bits represents Raw Clear Color Red, Green, Blue and Alpha color each + * represented by 32 bits. The raw clear color is consumed by the 3d engine and + * generates the converted clear color of size 64 bits. The first 32 bits store + * the Lower Converted Clear Color value and the next 32 bits store the Higher + * Converted Clear Color value when applicable. The Converted Clear Color values + * are consumed by the DE. The last 64 bits are used to store Color Discard + * Enable and Depth Clear Value Valid which are ignored by the DE. A CCS cache + * line corresponds to an area of 4x1 tiles in the main surface. The main + * surface pitch is required to be a multiple of 4 tile widths. + */ +#define I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC fourcc_mod_code(INTEL, 8) + +/* + * Intel Tile 4 layout + * + * This is a tiled layout using 4KB tiles in a row-major layout. It has the same + * shape as Tile Y at two granularities: 4KB (128B x 32) and 64B (16B x 4). It + * only differs from Tile Y at the 256B granularity in between. At this + * granularity, Tile Y has a shape of 16B x 32 rows, but this tiling has a shape + * of 64B x 8 rows. + */ +#define I915_FORMAT_MOD_4_TILED fourcc_mod_code(INTEL, 9) + +/* + * Intel color control surfaces (CCS) for DG2 render compression. + * + * The main surface is Tile 4 and at plane index 0. The CCS data is stored + * outside of the GEM object in a reserved memory area dedicated for the + * storage of the CCS data for all RC/RC_CC/MC compressible GEM objects. The + * main surface pitch is required to be a multiple of four Tile 4 widths. + */ +#define I915_FORMAT_MOD_4_TILED_DG2_RC_CCS fourcc_mod_code(INTEL, 10) + +/* + * Intel color control surfaces (CCS) for DG2 media compression. + * + * The main surface is Tile 4 and at plane index 0. For semi-planar formats + * like NV12, the Y and UV planes are Tile 4 and are located at plane indices + * 0 and 1, respectively. The CCS for all planes are stored outside of the + * GEM object in a reserved memory area dedicated for the storage of the + * CCS data for all RC/RC_CC/MC compressible GEM objects. The main surface + * pitch is required to be a multiple of four Tile 4 widths. + */ +#define I915_FORMAT_MOD_4_TILED_DG2_MC_CCS fourcc_mod_code(INTEL, 11) + +/* + * Intel Color Control Surface with Clear Color (CCS) for DG2 render + * compression. + * + * The main surface is Tile 4 and at plane index 0. The CCS data is stored + * outside of the GEM object in a reserved memory area dedicated for the + * storage of the CCS data for all RC/RC_CC/MC compressible GEM objects. The + * main surface pitch is required to be a multiple of four Tile 4 widths. The + * clear color is stored at plane index 1 and the pitch should be 64 bytes + * aligned. The format of the 256 bits of clear color data matches the one used + * for the I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC modifier, see its description + * for details. + */ +#define I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC fourcc_mod_code(INTEL, 12) + +/* + * Intel Color Control Surfaces (CCS) for display ver. 14 render compression. + * + * The main surface is tile4 and at plane index 0, the CCS is linear and + * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in + * main surface. In other words, 4 bits in CCS map to a main surface cache + * line pair. The main surface pitch is required to be a multiple of four + * tile4 widths. + */ +#define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS fourcc_mod_code(INTEL, 13) + +/* + * Intel Color Control Surfaces (CCS) for display ver. 14 media compression + * + * The main surface is tile4 and at plane index 0, the CCS is linear and + * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in + * main surface. In other words, 4 bits in CCS map to a main surface cache + * line pair. The main surface pitch is required to be a multiple of four + * tile4 widths. For semi-planar formats like NV12, CCS planes follow the + * Y and UV planes i.e., planes 0 and 1 are used for Y and UV surfaces, + * planes 2 and 3 for the respective CCS. + */ +#define I915_FORMAT_MOD_4_TILED_MTL_MC_CCS fourcc_mod_code(INTEL, 14) + +/* + * Intel Color Control Surface with Clear Color (CCS) for display ver. 14 render + * compression. + * + * The main surface is tile4 and is at plane index 0 whereas CCS is linear + * and at index 1. The clear color is stored at index 2, and the pitch should + * be ignored. The clear color structure is 256 bits. The first 128 bits + * represents Raw Clear Color Red, Green, Blue and Alpha color each represented + * by 32 bits. The raw clear color is consumed by the 3d engine and generates + * the converted clear color of size 64 bits. The first 32 bits store the Lower + * Converted Clear Color value and the next 32 bits store the Higher Converted + * Clear Color value when applicable. The Converted Clear Color values are + * consumed by the DE. The last 64 bits are used to store Color Discard Enable + * and Depth Clear Value Valid which are ignored by the DE. A CCS cache line + * corresponds to an area of 4x1 tiles in the main surface. The main surface + * pitch is required to be a multiple of 4 tile widths. + */ +#define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS_CC fourcc_mod_code(INTEL, 15) + +/* + * Intel Color Control Surfaces (CCS) for graphics ver. 20 unified compression + * on integrated graphics + * + * The main surface is Tile 4 and at plane index 0. For semi-planar formats + * like NV12, the Y and UV planes are Tile 4 and are located at plane indices + * 0 and 1, respectively. The CCS for all planes are stored outside of the + * GEM object in a reserved memory area dedicated for the storage of the + * CCS data for all compressible GEM objects. + */ +#define I915_FORMAT_MOD_4_TILED_LNL_CCS fourcc_mod_code(INTEL, 16) + +/* + * Intel Color Control Surfaces (CCS) for graphics ver. 20 unified compression + * on discrete graphics + * + * The main surface is Tile 4 and at plane index 0. For semi-planar formats + * like NV12, the Y and UV planes are Tile 4 and are located at plane indices + * 0 and 1, respectively. The CCS for all planes are stored outside of the + * GEM object in a reserved memory area dedicated for the storage of the + * CCS data for all compressible GEM objects. The GEM object must be stored in + * contiguous memory with a size aligned to 64KB + */ +#define I915_FORMAT_MOD_4_TILED_BMG_CCS fourcc_mod_code(INTEL, 17) + +/* + * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks + * + * Macroblocks are laid in a Z-shape, and each pixel data is following the + * standard NV12 style. + * As for NV12, an image is the result of two frame buffers: one for Y, + * one for the interleaved Cb/Cr components (1/2 the height of the Y buffer). + * Alignment requirements are (for each buffer): + * - multiple of 128 pixels for the width + * - multiple of 32 pixels for the height + * + * For more information: see + * https://linuxtv.org/downloads/v4l-dvb-apis/re32.html + */ +#define DRM_FORMAT_MOD_SAMSUNG_64_32_TILE fourcc_mod_code(SAMSUNG, 1) + +/* + * Tiled, 16 (pixels) x 16 (lines) - sized macroblocks + * + * This is a simple tiled layout using tiles of 16x16 pixels in a row-major + * layout. For YCbCr formats Cb/Cr components are taken in such a way that + * they correspond to their 16x16 luma block. + */ +#define DRM_FORMAT_MOD_SAMSUNG_16_16_TILE fourcc_mod_code(SAMSUNG, 2) + +/* + * Qualcomm Compressed Format + * + * Refers to a compressed variant of the base format that is compressed. + * Implementation may be platform and base-format specific. + * + * Each macrotile consists of m x n (mostly 4 x 4) tiles. + * Pixel data pitch/stride is aligned with macrotile width. + * Pixel data height is aligned with macrotile height. + * Entire pixel data buffer is aligned with 4k(bytes). + */ +#define DRM_FORMAT_MOD_QCOM_COMPRESSED fourcc_mod_code(QCOM, 1) + +/* + * Qualcomm Tiled Format + * + * Similar to DRM_FORMAT_MOD_QCOM_COMPRESSED but not compressed. + * Implementation may be platform and base-format specific. + * + * Each macrotile consists of m x n (mostly 4 x 4) tiles. + * Pixel data pitch/stride is aligned with macrotile width. + * Pixel data height is aligned with macrotile height. + * Entire pixel data buffer is aligned with 4k(bytes). + */ +#define DRM_FORMAT_MOD_QCOM_TILED3 fourcc_mod_code(QCOM, 3) + +/* + * Qualcomm Alternate Tiled Format + * + * Alternate tiled format typically only used within GMEM. + * Implementation may be platform and base-format specific. + */ +#define DRM_FORMAT_MOD_QCOM_TILED2 fourcc_mod_code(QCOM, 2) + +/* Vivante framebuffer modifiers */ + +/* + * Vivante 4x4 tiling layout + * + * This is a simple tiled layout using tiles of 4x4 pixels in a row-major + * layout. + */ +#define DRM_FORMAT_MOD_VIVANTE_TILED fourcc_mod_code(VIVANTE, 1) + +/* + * Vivante 64x64 super-tiling layout + * + * This is a tiled layout using 64x64 pixel super-tiles, where each super-tile + * contains 8x4 groups of 2x4 tiles of 4x4 pixels (like above) each, all in row- + * major layout. + * + * For more information: see + * https://github.com/etnaviv/etna_viv/blob/master/doc/hardware.md#texture-tiling + */ +#define DRM_FORMAT_MOD_VIVANTE_SUPER_TILED fourcc_mod_code(VIVANTE, 2) + +/* + * Vivante 4x4 tiling layout for dual-pipe + * + * Same as the 4x4 tiling layout, except every second 4x4 pixel tile starts at a + * different base address. Offsets from the base addresses are therefore halved + * compared to the non-split tiled layout. + */ +#define DRM_FORMAT_MOD_VIVANTE_SPLIT_TILED fourcc_mod_code(VIVANTE, 3) + +/* + * Vivante 64x64 super-tiling layout for dual-pipe + * + * Same as the 64x64 super-tiling layout, except every second 4x4 pixel tile + * starts at a different base address. Offsets from the base addresses are + * therefore halved compared to the non-split super-tiled layout. + */ +#define DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED fourcc_mod_code(VIVANTE, 4) + +/* + * Vivante TS (tile-status) buffer modifiers. They can be combined with all of + * the color buffer tiling modifiers defined above. When TS is present it's a + * separate buffer containing the clear/compression status of each tile. The + * modifiers are defined as VIVANTE_MOD_TS_c_s, where c is the color buffer + * tile size in bytes covered by one entry in the status buffer and s is the + * number of status bits per entry. + * We reserve the top 8 bits of the Vivante modifier space for tile status + * clear/compression modifiers, as future cores might add some more TS layout + * variations. + */ +#define VIVANTE_MOD_TS_64_4 (1ULL << 48) +#define VIVANTE_MOD_TS_64_2 (2ULL << 48) +#define VIVANTE_MOD_TS_128_4 (3ULL << 48) +#define VIVANTE_MOD_TS_256_4 (4ULL << 48) +#define VIVANTE_MOD_TS_MASK (0xfULL << 48) + +/* + * Vivante compression modifiers. Those depend on a TS modifier being present + * as the TS bits get reinterpreted as compression tags instead of simple + * clear markers when compression is enabled. + */ +#define VIVANTE_MOD_COMP_DEC400 (1ULL << 52) +#define VIVANTE_MOD_COMP_MASK (0xfULL << 52) + +/* Masking out the extension bits will yield the base modifier. */ +#define VIVANTE_MOD_EXT_MASK (VIVANTE_MOD_TS_MASK | VIVANTE_MOD_COMP_MASK) + +/* NVIDIA frame buffer modifiers */ + +/* + * Tegra Tiled Layout, used by Tegra 2, 3 and 4. + * + * Pixels are arranged in simple tiles of 16 x 16 bytes. + */ +#define DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED fourcc_mod_code(NVIDIA, 1) + +/* + * Generalized Block Linear layout, used by desktop GPUs starting with NV50/G80, + * and Tegra GPUs starting with Tegra K1. + * + * Pixels are arranged in Groups of Bytes (GOBs). GOB size and layout varies + * based on the architecture generation. GOBs themselves are then arranged in + * 3D blocks, with the block dimensions (in terms of GOBs) always being a power + * of two, and hence expressible as their log2 equivalent (E.g., "2" represents + * a block depth or height of "4"). + * + * Chapter 20 "Pixel Memory Formats" of the Tegra X1 TRM describes this format + * in full detail. + * + * Macro + * Bits Param Description + * ---- ----- ----------------------------------------------------------------- + * + * 3:0 h log2(height) of each block, in GOBs. Placed here for + * compatibility with the existing + * DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers. + * + * 4:4 - Must be 1, to indicate block-linear layout. Necessary for + * compatibility with the existing + * DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK()-based modifiers. + * + * 8:5 - Reserved (To support 3D-surfaces with variable log2(depth) block + * size). Must be zero. + * + * Note there is no log2(width) parameter. Some portions of the + * hardware support a block width of two gobs, but it is impractical + * to use due to lack of support elsewhere, and has no known + * benefits. + * + * 11:9 - Reserved (To support 2D-array textures with variable array stride + * in blocks, specified via log2(tile width in blocks)). Must be + * zero. + * + * 19:12 k Page Kind. This value directly maps to a field in the page + * tables of all GPUs >= NV50. It affects the exact layout of bits + * in memory and can be derived from the tuple + * + * (format, GPU model, compression type, samples per pixel) + * + * Where compression type is defined below. If GPU model were + * implied by the format modifier, format, or memory buffer, page + * kind would not need to be included in the modifier itself, but + * since the modifier should define the layout of the associated + * memory buffer independent from any device or other context, it + * must be included here. + * + * 21:20 g GOB Height and Page Kind Generation. The height of a GOB changed + * starting with Fermi GPUs. Additionally, the mapping between page + * kind and bit layout has changed at various points. + * + * 0 = Gob Height 8, Fermi - Volta, Tegra K1+ Page Kind mapping + * 1 = Gob Height 4, G80 - GT2XX Page Kind mapping + * 2 = Gob Height 8, Turing+ Page Kind mapping + * 3 = Reserved for future use. + * + * 22:22 s Sector layout. There is a further bit remapping step that occurs + * 26:27 at an even lower level than the page kind and block linear + * swizzles. This causes the bit arrangement of surfaces in memory + * to differ subtly, and prevents direct sharing of surfaces between + * GPUs with different layouts. + * + * 0 = Tegra K1 - Tegra Parker/TX2 Layout + * 1 = Pre-GB20x, GB20x 32+ bpp, GB10, Tegra Xavier-Orin Layout + * 2 = GB20x(Blackwell 2)+ 8 bpp surface layout + * 3 = GB20x(Blackwell 2)+ 16 bpp surface layout + * 4 = Reserved for future use. + * 5 = Reserved for future use. + * 6 = Reserved for future use. + * 7 = Reserved for future use. + * + * 25:23 c Lossless Framebuffer Compression type. + * + * 0 = none + * 1 = ROP/3D, layout 1, exact compression format implied by Page + * Kind field + * 2 = ROP/3D, layout 2, exact compression format implied by Page + * Kind field + * 3 = CDE horizontal + * 4 = CDE vertical + * 5 = Reserved for future use + * 6 = Reserved for future use + * 7 = Reserved for future use + * + * 55:28 - Reserved for future use. Must be zero. + */ +#define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \ + fourcc_mod_code(NVIDIA, (0x10 | ((h) & 0xf) | (((k) & 0xff) << 12) | \ + (((g) & 0x3) << 20) | (((s) & 0x1) << 22) | \ + (((s) & 0x6) << 25) | (((c) & 0x7) << 23))) + +/* To grandfather in prior block linear format modifiers to the above layout, + * the page kind "0", which corresponds to "pitch/linear" and hence is unusable + * with block-linear layouts, is remapped within drivers to the value 0xfe, + * which corresponds to the "generic" kind used for simple single-sample + * uncompressed color formats on Fermi - Volta GPUs. + */ +static __inline__ __u64 +drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) +{ + if (!(modifier & 0x10) || (modifier & (0xff << 12))) + return modifier; + else + return modifier | (0xfe << 12); +} + +/* + * 16Bx2 Block Linear layout, used by Tegra K1 and later + * + * Pixels are arranged in 64x8 Groups Of Bytes (GOBs). GOBs are then stacked + * vertically by a power of 2 (1 to 32 GOBs) to form a block. + * + * Within a GOB, data is ordered as 16B x 2 lines sectors laid in Z-shape. + * + * Parameter 'v' is the log2 encoding of the number of GOBs stacked vertically. + * Valid values are: + * + * 0 == ONE_GOB + * 1 == TWO_GOBS + * 2 == FOUR_GOBS + * 3 == EIGHT_GOBS + * 4 == SIXTEEN_GOBS + * 5 == THIRTYTWO_GOBS + * + * Chapter 20 "Pixel Memory Formats" of the Tegra X1 TRM describes this format + * in full detail. + */ +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(v) \ + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 0, 0, 0, (v)) + +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_ONE_GOB \ + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0) +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_TWO_GOB \ + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1) +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_FOUR_GOB \ + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2) +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_EIGHT_GOB \ + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3) +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_SIXTEEN_GOB \ + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4) +#define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_THIRTYTWO_GOB \ + DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5) + +/* + * Some Broadcom modifiers take parameters, for example the number of + * vertical lines in the image. Reserve the lower 32 bits for modifier + * type, and the next 24 bits for parameters. Top 8 bits are the + * vendor code. + */ +#define __fourcc_mod_broadcom_param_shift 8 +#define __fourcc_mod_broadcom_param_bits 48 +#define fourcc_mod_broadcom_code(val, params) \ + fourcc_mod_code(BROADCOM, ((((__u64)params) \ + << __fourcc_mod_broadcom_param_shift) | \ + val)) +#define fourcc_mod_broadcom_param(m) \ + ((int)(((m) >> __fourcc_mod_broadcom_param_shift) & \ + ((1ULL << __fourcc_mod_broadcom_param_bits) - 1))) +#define fourcc_mod_broadcom_mod(m) \ + ((m) & ~(((1ULL << __fourcc_mod_broadcom_param_bits) - 1) \ + << __fourcc_mod_broadcom_param_shift)) + +/* + * Broadcom VC4 "T" format + * + * This is the primary layout that the V3D GPU can texture from (it + * can't do linear). The T format has: + * + * - 64b utiles of pixels in a raster-order grid according to cpp. It's 4x4 + * pixels at 32 bit depth. + * + * - 1k subtiles made of a 4x4 raster-order grid of 64b utiles (so usually + * 16x16 pixels). + * + * - 4k tiles made of a 2x2 grid of 1k subtiles (so usually 32x32 pixels). On + * even 4k tile rows, they're arranged as (BL, TL, TR, BR), and on odd rows + * they're (TR, BR, BL, TL), where bottom left is start of memory. + * + * - an image made of 4k tiles in rows either left-to-right (even rows of 4k + * tiles) or right-to-left (odd rows of 4k tiles). + */ +#define DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED fourcc_mod_code(BROADCOM, 1) + +/* + * Broadcom SAND format + * + * This is the native format that the H.264 codec block uses. For VC4 + * HVS, it is only valid for H.264 (NV12/21) and RGBA modes. + * + * The image can be considered to be split into columns, and the + * columns are placed consecutively into memory. The width of those + * columns can be either 32, 64, 128, or 256 pixels, but in practice + * only 128 pixel columns are used. + * + * The pitch between the start of each column is set to optimally + * switch between SDRAM banks. This is passed as the number of lines + * of column width in the modifier (we can't use the stride value due + * to various core checks that look at it , so you should set the + * stride to width*cpp). + * + * Note that the column height for this format modifier is the same + * for all of the planes, assuming that each column contains both Y + * and UV. Some SAND-using hardware stores UV in a separate tiled + * image from Y to reduce the column height, which is not supported + * with these modifiers. + * + * The DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT modifier is also + * supported for DRM_FORMAT_P030 where the columns remain as 128 bytes + * wide, but as this is a 10 bpp format that translates to 96 pixels. + */ + +#define DRM_FORMAT_MOD_BROADCOM_SAND32_COL_HEIGHT(v) \ + fourcc_mod_broadcom_code(2, v) +#define DRM_FORMAT_MOD_BROADCOM_SAND64_COL_HEIGHT(v) \ + fourcc_mod_broadcom_code(3, v) +#define DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(v) \ + fourcc_mod_broadcom_code(4, v) +#define DRM_FORMAT_MOD_BROADCOM_SAND256_COL_HEIGHT(v) \ + fourcc_mod_broadcom_code(5, v) + +#define DRM_FORMAT_MOD_BROADCOM_SAND32 \ + DRM_FORMAT_MOD_BROADCOM_SAND32_COL_HEIGHT(0) +#define DRM_FORMAT_MOD_BROADCOM_SAND64 \ + DRM_FORMAT_MOD_BROADCOM_SAND64_COL_HEIGHT(0) +#define DRM_FORMAT_MOD_BROADCOM_SAND128 \ + DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(0) +#define DRM_FORMAT_MOD_BROADCOM_SAND256 \ + DRM_FORMAT_MOD_BROADCOM_SAND256_COL_HEIGHT(0) + +/* Broadcom UIF format + * + * This is the common format for the current Broadcom multimedia + * blocks, including V3D 3.x and newer, newer video codecs, and + * displays. + * + * The image consists of utiles (64b blocks), UIF blocks (2x2 utiles), + * and macroblocks (4x4 UIF blocks). Those 4x4 UIF block groups are + * stored in columns, with padding between the columns to ensure that + * moving from one column to the next doesn't hit the same SDRAM page + * bank. + * + * To calculate the padding, it is assumed that each hardware block + * and the software driving it knows the platform's SDRAM page size, + * number of banks, and XOR address, and that it's identical between + * all blocks using the format. This tiling modifier will use XOR as + * necessary to reduce the padding. If a hardware block can't do XOR, + * the assumption is that a no-XOR tiling modifier will be created. + */ +#define DRM_FORMAT_MOD_BROADCOM_UIF fourcc_mod_code(BROADCOM, 6) + +/* + * Arm Framebuffer Compression (AFBC) modifiers + * + * AFBC is a proprietary lossless image compression protocol and format. + * It provides fine-grained random access and minimizes the amount of data + * transferred between IP blocks. + * + * AFBC has several features which may be supported and/or used, which are + * represented using bits in the modifier. Not all combinations are valid, + * and different devices or use-cases may support different combinations. + * + * Further information on the use of AFBC modifiers can be found in + * Documentation/gpu/afbc.rst + */ + +/* + * The top 4 bits (out of the 56 bits allotted for specifying vendor specific + * modifiers) denote the category for modifiers. Currently we have three + * categories of modifiers ie AFBC, MISC and AFRC. We can have a maximum of + * sixteen different categories. + */ +#define DRM_FORMAT_MOD_ARM_CODE(__type, __val) \ + fourcc_mod_code(ARM, ((__u64)(__type) << 52) | \ + ((__val) & 0x000fffffffffffffULL)) + +#define DRM_FORMAT_MOD_ARM_TYPE_AFBC 0x00 +#define DRM_FORMAT_MOD_ARM_TYPE_MISC 0x01 + +#define DRM_FORMAT_MOD_ARM_AFBC(__afbc_mode) \ + DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_AFBC, __afbc_mode) + +/* + * AFBC superblock size + * + * Indicates the superblock size(s) used for the AFBC buffer. The buffer + * size (in pixels) must be aligned to a multiple of the superblock size. + * Four lowest significant bits(LSBs) are reserved for block size. + * + * Where one superblock size is specified, it applies to all planes of the + * buffer (e.g. 16x16, 32x8). When multiple superblock sizes are specified, + * the first applies to the Luma plane and the second applies to the Chroma + * plane(s). e.g. (32x8_64x4 means 32x8 Luma, with 64x4 Chroma). + * Multiple superblock sizes are only valid for multi-plane YCbCr formats. + */ +#define AFBC_FORMAT_MOD_BLOCK_SIZE_MASK 0xf +#define AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 (1ULL) +#define AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 (2ULL) +#define AFBC_FORMAT_MOD_BLOCK_SIZE_64x4 (3ULL) +#define AFBC_FORMAT_MOD_BLOCK_SIZE_32x8_64x4 (4ULL) + +/* + * AFBC lossless colorspace transform + * + * Indicates that the buffer makes use of the AFBC lossless colorspace + * transform. + */ +#define AFBC_FORMAT_MOD_YTR (1ULL << 4) + +/* + * AFBC block-split + * + * Indicates that the payload of each superblock is split. The second + * half of the payload is positioned at a predefined offset from the start + * of the superblock payload. + */ +#define AFBC_FORMAT_MOD_SPLIT (1ULL << 5) + +/* + * AFBC sparse layout + * + * This flag indicates that the payload of each superblock must be stored at a + * predefined position relative to the other superblocks in the same AFBC + * buffer. This order is the same order used by the header buffer. In this mode + * each superblock is given the same amount of space as an uncompressed + * superblock of the particular format would require, rounding up to the next + * multiple of 128 bytes in size. + */ +#define AFBC_FORMAT_MOD_SPARSE (1ULL << 6) + +/* + * AFBC copy-block restrict + * + * Buffers with this flag must obey the copy-block restriction. The restriction + * is such that there are no copy-blocks referring across the border of 8x8 + * blocks. For the subsampled data the 8x8 limitation is also subsampled. + */ +#define AFBC_FORMAT_MOD_CBR (1ULL << 7) + +/* + * AFBC tiled layout + * + * The tiled layout groups superblocks in 8x8 or 4x4 tiles, where all + * superblocks inside a tile are stored together in memory. 8x8 tiles are used + * for pixel formats up to and including 32 bpp while 4x4 tiles are used for + * larger bpp formats. The order between the tiles is scan line. + * When the tiled layout is used, the buffer size (in pixels) must be aligned + * to the tile size. + */ +#define AFBC_FORMAT_MOD_TILED (1ULL << 8) + +/* + * AFBC solid color blocks + * + * Indicates that the buffer makes use of solid-color blocks, whereby bandwidth + * can be reduced if a whole superblock is a single color. + */ +#define AFBC_FORMAT_MOD_SC (1ULL << 9) + +/* + * AFBC double-buffer + * + * Indicates that the buffer is allocated in a layout safe for front-buffer + * rendering. + */ +#define AFBC_FORMAT_MOD_DB (1ULL << 10) + +/* + * AFBC buffer content hints + * + * Indicates that the buffer includes per-superblock content hints. + */ +#define AFBC_FORMAT_MOD_BCH (1ULL << 11) + +/* AFBC uncompressed storage mode + * + * Indicates that the buffer is using AFBC uncompressed storage mode. + * In this mode all superblock payloads in the buffer use the uncompressed + * storage mode, which is usually only used for data which cannot be compressed. + * The buffer layout is the same as for AFBC buffers without USM set, this only + * affects the storage mode of the individual superblocks. Note that even a + * buffer without USM set may use uncompressed storage mode for some or all + * superblocks, USM just guarantees it for all. + */ +#define AFBC_FORMAT_MOD_USM (1ULL << 12) + +/* + * Arm Fixed-Rate Compression (AFRC) modifiers + * + * AFRC is a proprietary fixed rate image compression protocol and format, + * designed to provide guaranteed bandwidth and memory footprint + * reductions in graphics and media use-cases. + * + * AFRC buffers consist of one or more planes, with the same components + * and meaning as an uncompressed buffer using the same pixel format. + * + * Within each plane, the pixel/luma/chroma values are grouped into + * "coding unit" blocks which are individually compressed to a + * fixed size (in bytes). All coding units within a given plane of a buffer + * store the same number of values, and have the same compressed size. + * + * The coding unit size is configurable, allowing different rates of + * compression. + * + * The start of each AFRC buffer plane must be aligned to an alignment granule + * which depends on the coding unit size. + * + * Coding Unit Size Plane Alignment + * ---------------- --------------- + * 16 bytes 1024 bytes + * 24 bytes 512 bytes + * 32 bytes 2048 bytes + * + * Coding units are grouped into paging tiles. AFRC buffer dimensions must be + * aligned to a multiple of the paging tile dimensions. The dimensions of each + * paging tile depend on whether the buffer is optimised for scanline (SCAN + * layout) or rotated (ROT layout) access. + * + * Layout Paging Tile Width Paging Tile Height + * ------ ----------------- ------------------ + * SCAN 16 coding units 4 coding units + * ROT 8 coding units 8 coding units + * + * The dimensions of each coding unit depend on the number of components + * in the compressed plane and whether the buffer is optimised for + * scanline (SCAN layout) or rotated (ROT layout) access. + * + * Number of Components in Plane Layout Coding Unit Width Coding Unit + * Height + * ----------------------------- --------- ----------------- + * ------------------ 1 SCAN 16 samples 4 + * samples Example: 16x4 luma samples in a 'Y' plane 16x4 chroma 'V' values, in + * the 'V' plane of a fully-planar YUV buffer + * ----------------------------- --------- ----------------- + * ------------------ 1 ROT 8 samples 8 + * samples Example: 8x8 luma samples in a 'Y' plane 8x8 chroma 'V' values, in + * the 'V' plane of a fully-planar YUV buffer + * ----------------------------- --------- ----------------- + * ------------------ 2 DONT CARE 8 samples 4 + * samples Example: 8x4 chroma pairs in the 'UV' plane of a semi-planar YUV + * buffer + * ----------------------------- --------- ----------------- + * ------------------ 3 DONT CARE 4 samples 4 + * samples Example: 4x4 pixels in an RGB buffer without alpha + * ----------------------------- --------- ----------------- + * ------------------ 4 DONT CARE 4 samples 4 + * samples Example: 4x4 pixels in an RGB buffer with alpha + */ + +#define DRM_FORMAT_MOD_ARM_TYPE_AFRC 0x02 + +#define DRM_FORMAT_MOD_ARM_AFRC(__afrc_mode) \ + DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_AFRC, __afrc_mode) + +/* + * AFRC coding unit size modifier. + * + * Indicates the number of bytes used to store each compressed coding unit for + * one or more planes in an AFRC encoded buffer. The coding unit size for + * chrominance is the same for both Cb and Cr, which may be stored in separate + * planes. + * + * AFRC_FORMAT_MOD_CU_SIZE_P0 indicates the number of bytes used to store + * each compressed coding unit in the first plane of the buffer. For RGBA + * buffers this is the only plane, while for semi-planar and fully-planar YUV + * buffers, this corresponds to the luma plane. + * + * AFRC_FORMAT_MOD_CU_SIZE_P12 indicates the number of bytes used to store + * each compressed coding unit in the second and third planes in the buffer. + * For semi-planar and fully-planar YUV buffers, this corresponds to the chroma + * plane(s). + * + * For single-plane buffers, AFRC_FORMAT_MOD_CU_SIZE_P0 must be specified + * and AFRC_FORMAT_MOD_CU_SIZE_P12 must be zero. + * For semi-planar and fully-planar buffers, both AFRC_FORMAT_MOD_CU_SIZE_P0 and + * AFRC_FORMAT_MOD_CU_SIZE_P12 must be specified. + */ +#define AFRC_FORMAT_MOD_CU_SIZE_MASK 0xf +#define AFRC_FORMAT_MOD_CU_SIZE_16 (1ULL) +#define AFRC_FORMAT_MOD_CU_SIZE_24 (2ULL) +#define AFRC_FORMAT_MOD_CU_SIZE_32 (3ULL) + +#define AFRC_FORMAT_MOD_CU_SIZE_P0(__afrc_cu_size) (__afrc_cu_size) +#define AFRC_FORMAT_MOD_CU_SIZE_P12(__afrc_cu_size) ((__afrc_cu_size) << 4) + +/* + * AFRC scanline memory layout. + * + * Indicates if the buffer uses the scanline-optimised layout + * for an AFRC encoded buffer, otherwise, it uses the rotation-optimised layout. + * The memory layout is the same for all planes. + */ +#define AFRC_FORMAT_MOD_LAYOUT_SCAN (1ULL << 8) + +/* + * Arm 16x16 Block U-Interleaved modifier + * + * This is used by Arm Mali Utgard and Midgard GPUs. It divides the image + * into 16x16 pixel blocks. Blocks are stored linearly in order, but pixels + * in the block are reordered. + */ +#define DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED \ + DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_MISC, 1ULL) + +/* + * Allwinner tiled modifier + * + * This tiling mode is implemented by the VPU found on all Allwinner platforms, + * codenamed sunxi. It is associated with a YUV format that uses either 2 or 3 + * planes. + * + * With this tiling, the luminance samples are disposed in tiles representing + * 32x32 pixels and the chrominance samples in tiles representing 32x64 pixels. + * The pixel order in each tile is linear and the tiles are disposed linearly, + * both in row-major order. + */ +#define DRM_FORMAT_MOD_ALLWINNER_TILED fourcc_mod_code(ALLWINNER, 1) + +/* + * Amlogic Video Framebuffer Compression modifiers + * + * Amlogic uses a proprietary lossless image compression protocol and format + * for their hardware video codec accelerators, either video decoders or + * video input encoders. + * + * It considerably reduces memory bandwidth while writing and reading + * frames in memory. + * + * The underlying storage is considered to be 3 components, 8bit or 10-bit + * per component YCbCr 420, single plane : + * - DRM_FORMAT_YUV420_8BIT + * - DRM_FORMAT_YUV420_10BIT + * + * The first 8 bits of the mode defines the layout, then the following 8 bits + * defines the options changing the layout. + * + * Not all combinations are valid, and different SoCs may support different + * combinations of layout and options. + */ +#define __fourcc_mod_amlogic_layout_mask 0xff +#define __fourcc_mod_amlogic_options_shift 8 +#define __fourcc_mod_amlogic_options_mask 0xff + +#define DRM_FORMAT_MOD_AMLOGIC_FBC(__layout, __options) \ + fourcc_mod_code( \ + AMLOGIC, \ + ((__layout) & __fourcc_mod_amlogic_layout_mask) | \ + (((__options) & __fourcc_mod_amlogic_options_mask) \ + << __fourcc_mod_amlogic_options_shift)) + +/* Amlogic FBC Layouts */ + +/* + * Amlogic FBC Basic Layout + * + * The basic layout is composed of: + * - a body content organized in 64x32 superblocks with 4096 bytes per + * superblock in default mode. + * - a 32 bytes per 128x64 header block + * + * This layout is transferrable between Amlogic SoCs supporting this modifier. + */ +#define AMLOGIC_FBC_LAYOUT_BASIC (1ULL) + +/* + * Amlogic FBC Scatter Memory layout + * + * Indicates the header contains IOMMU references to the compressed + * frames content to optimize memory access and layout. + * + * In this mode, only the header memory address is needed, thus the + * content memory organization is tied to the current producer + * execution and cannot be saved/dumped neither transferrable between + * Amlogic SoCs supporting this modifier. + * + * Due to the nature of the layout, these buffers are not expected to + * be accessible by the user-space clients, but only accessible by the + * hardware producers and consumers. + * + * The user-space clients should expect a failure while trying to mmap + * the DMA-BUF handle returned by the producer. + */ +#define AMLOGIC_FBC_LAYOUT_SCATTER (2ULL) + +/* Amlogic FBC Layout Options Bit Mask */ + +/* + * Amlogic FBC Memory Saving mode + * + * Indicates the storage is packed when pixel size is multiple of word + * boundaries, i.e. 8bit should be stored in this mode to save allocation + * memory. + * + * This mode reduces body layout to 3072 bytes per 64x32 superblock with + * the basic layout and 3200 bytes per 64x32 superblock combined with + * the scatter layout. + */ +#define AMLOGIC_FBC_OPTION_MEM_SAVING (1ULL << 0) + +/* MediaTek modifiers + * Bits Parameter Notes + * ----- ------------------------ --------------------------------------------- + * 7: 0 TILE LAYOUT Values are MTK_FMT_MOD_TILE_* + * 15: 8 COMPRESSION Values are MTK_FMT_MOD_COMPRESS_* + * 23:16 10 BIT LAYOUT Values are MTK_FMT_MOD_10BIT_LAYOUT_* + * + */ + +#define DRM_FORMAT_MOD_MTK(__flags) fourcc_mod_code(MTK, __flags) + +/* + * MediaTek Tiled Modifier + * The lowest 8 bits of the modifier is used to specify the tiling + * layout. Only the 16L_32S tiling is used for now, but we define an + * "untiled" version and leave room for future expansion. + */ +#define MTK_FMT_MOD_TILE_MASK 0xf +#define MTK_FMT_MOD_TILE_NONE 0x0 +#define MTK_FMT_MOD_TILE_16L32S 0x1 + +/* + * Bits 8-15 specify compression options + */ +#define MTK_FMT_MOD_COMPRESS_MASK (0xf << 8) +#define MTK_FMT_MOD_COMPRESS_NONE (0x0 << 8) +#define MTK_FMT_MOD_COMPRESS_V1 (0x1 << 8) + +/* + * Bits 16-23 specify how the bits of 10 bit formats are + * stored out in memory + */ +#define MTK_FMT_MOD_10BIT_LAYOUT_MASK (0xf << 16) +#define MTK_FMT_MOD_10BIT_LAYOUT_PACKED (0x0 << 16) +#define MTK_FMT_MOD_10BIT_LAYOUT_LSBTILED (0x1 << 16) +#define MTK_FMT_MOD_10BIT_LAYOUT_LSBRASTER (0x2 << 16) + +/* alias for the most common tiling format */ +#define DRM_FORMAT_MOD_MTK_16L_32S_TILE \ + DRM_FORMAT_MOD_MTK(MTK_FMT_MOD_TILE_16L32S) + +/* + * Apple GPU-tiled layouts. + * + * Apple GPUs support nonlinear tilings with optional lossless compression. + * + * GPU-tiled images are divided into 16KiB tiles: + * + * Bytes per pixel Tile size + * --------------- --------- + * 1 128x128 + * 2 128x64 + * 4 64x64 + * 8 64x32 + * 16 32x32 + * + * Tiles are raster-order. Pixels within a tile are interleaved (Morton order). + * + * Compressed images pad the body to 128-bytes and are immediately followed by a + * metadata section. The metadata section rounds the image dimensions to + * powers-of-two and contains 8 bytes for each 16x16 compression subtile. + * Subtiles are interleaved (Morton order). + * + * All images are 128-byte aligned. + * + * These layouts fundamentally do not have meaningful strides. No matter how we + * specify strides for these layouts, userspace unaware of Apple image layouts + * will be unable to use correctly the specified stride for any purpose. + * Userspace aware of the image layouts do not use strides. The most "correct" + * convention would be setting the image stride to 0. Unfortunately, some + * software assumes the stride is at least (width * bytes per pixel). We + * therefore require that stride equals (width * bytes per pixel). Since the + * stride is arbitrary here, we pick the simplest convention. + * + * Although containing two sections, compressed image layouts are treated in + * software as a single plane. This is modelled after AFBC, a similar + * scheme. Attempting to separate the sections to be "explicit" in DRM would + * only generate more confusion, as software does not treat the image this way. + * + * For detailed information on the hardware image layouts, see + * https://docs.mesa3d.org/drivers/asahi.html#image-layouts + */ +#define DRM_FORMAT_MOD_APPLE_GPU_TILED fourcc_mod_code(APPLE, 1) +#define DRM_FORMAT_MOD_APPLE_GPU_TILED_COMPRESSED fourcc_mod_code(APPLE, 2) + +/* + * AMD modifiers + * + * Memory layout: + * + * without DCC: + * - main surface + * + * with DCC & without DCC_RETILE: + * - main surface in plane 0 + * - DCC surface in plane 1 (RB-aligned, pipe-aligned if DCC_PIPE_ALIGN is + * set) + * + * with DCC & DCC_RETILE: + * - main surface in plane 0 + * - displayable DCC surface in plane 1 (not RB-aligned & not pipe-aligned) + * - pipe-aligned DCC surface in plane 2 (RB-aligned & pipe-aligned) + * + * For multi-plane formats the above surfaces get merged into one plane for + * each format plane, based on the required alignment only. + * + * Bits Parameter Notes + * ----- ------------------------ --------------------------------------------- + * + * 7:0 TILE_VERSION Values are AMD_FMT_MOD_TILE_VER_* + * 12:8 TILE Values are AMD_FMT_MOD_TILE_<version>_* + * 13 DCC + * 14 DCC_RETILE + * 15 DCC_PIPE_ALIGN + * 16 DCC_INDEPENDENT_64B + * 17 DCC_INDEPENDENT_128B + * 19:18 DCC_MAX_COMPRESSED_BLOCK Values are AMD_FMT_MOD_DCC_BLOCK_* + * 20 DCC_CONSTANT_ENCODE + * 23:21 PIPE_XOR_BITS Only for some chips + * 26:24 BANK_XOR_BITS Only for some chips + * 29:27 PACKERS Only for some chips + * 32:30 RB Only for some chips + * 35:33 PIPE Only for some chips + * 55:36 - Reserved for future use, must be zero + */ +#define AMD_FMT_MOD fourcc_mod_code(AMD, 0) + +#define IS_AMD_FMT_MOD(val) (((val) >> 56) == DRM_FORMAT_MOD_VENDOR_AMD) + +/* Reserve 0 for GFX8 and older */ +#define AMD_FMT_MOD_TILE_VER_GFX9 1 +#define AMD_FMT_MOD_TILE_VER_GFX10 2 +#define AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS 3 +#define AMD_FMT_MOD_TILE_VER_GFX11 4 +#define AMD_FMT_MOD_TILE_VER_GFX12 5 + +/* + * 64K_S is the same for GFX9/GFX10/GFX10_RBPLUS and hence has GFX9 as canonical + * version. + */ +#define AMD_FMT_MOD_TILE_GFX9_64K_S 9 + +/* + * 64K_D for non-32 bpp is the same for GFX9/GFX10/GFX10_RBPLUS and hence has + * GFX9 as canonical version. + * + * 64K_D_2D on GFX12 is identical to 64K_D on GFX11. + */ +#define AMD_FMT_MOD_TILE_GFX9_64K_D 10 +#define AMD_FMT_MOD_TILE_GFX9_4K_D_X 22 +#define AMD_FMT_MOD_TILE_GFX9_64K_S_X 25 +#define AMD_FMT_MOD_TILE_GFX9_64K_D_X 26 +#define AMD_FMT_MOD_TILE_GFX9_64K_R_X 27 +#define AMD_FMT_MOD_TILE_GFX11_256K_R_X 31 + +/* Gfx12 swizzle modes: + * 0 - LINEAR + * 1 - 256B_2D - 2D block dimensions + * 2 - 4KB_2D + * 3 - 64KB_2D + * 4 - 256KB_2D + * 5 - 4KB_3D - 3D block dimensions + * 6 - 64KB_3D + * 7 - 256KB_3D + */ +#define AMD_FMT_MOD_TILE_GFX12_256B_2D 1 +#define AMD_FMT_MOD_TILE_GFX12_4K_2D 2 +#define AMD_FMT_MOD_TILE_GFX12_64K_2D 3 +#define AMD_FMT_MOD_TILE_GFX12_256K_2D 4 + +#define AMD_FMT_MOD_DCC_BLOCK_64B 0 +#define AMD_FMT_MOD_DCC_BLOCK_128B 1 +#define AMD_FMT_MOD_DCC_BLOCK_256B 2 + +#define AMD_FMT_MOD_TILE_VERSION_SHIFT 0 +#define AMD_FMT_MOD_TILE_VERSION_MASK 0xFF +#define AMD_FMT_MOD_TILE_SHIFT 8 +#define AMD_FMT_MOD_TILE_MASK 0x1F + +/* Whether DCC compression is enabled. */ +#define AMD_FMT_MOD_DCC_SHIFT 13 +#define AMD_FMT_MOD_DCC_MASK 0x1 + +/* + * Whether to include two DCC surfaces, one which is rb & pipe aligned, and + * one which is not-aligned. + */ +#define AMD_FMT_MOD_DCC_RETILE_SHIFT 14 +#define AMD_FMT_MOD_DCC_RETILE_MASK 0x1 + +/* Only set if DCC_RETILE = false */ +#define AMD_FMT_MOD_DCC_PIPE_ALIGN_SHIFT 15 +#define AMD_FMT_MOD_DCC_PIPE_ALIGN_MASK 0x1 + +#define AMD_FMT_MOD_DCC_INDEPENDENT_64B_SHIFT 16 +#define AMD_FMT_MOD_DCC_INDEPENDENT_64B_MASK 0x1 +#define AMD_FMT_MOD_DCC_INDEPENDENT_128B_SHIFT 17 +#define AMD_FMT_MOD_DCC_INDEPENDENT_128B_MASK 0x1 +#define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_SHIFT 18 +#define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 + +/* + * DCC supports embedding some clear colors directly in the DCC surface. + * However, on older GPUs the rendering HW ignores the embedded clear color + * and prefers the driver provided color. This necessitates doing a fastclear + * eliminate operation before a process transfers control. + * + * If this bit is set that means the fastclear eliminate is not needed for these + * embeddable colors. + */ +#define AMD_FMT_MOD_DCC_CONSTANT_ENCODE_SHIFT 20 +#define AMD_FMT_MOD_DCC_CONSTANT_ENCODE_MASK 0x1 + +/* + * The below fields are for accounting for per GPU differences. These are only + * relevant for GFX9 and later and if the tile field is *_X/_T. + * + * PIPE_XOR_BITS = always needed + * BANK_XOR_BITS = only for TILE_VER_GFX9 + * PACKERS = only for TILE_VER_GFX10_RBPLUS + * RB = only for TILE_VER_GFX9 & DCC + * PIPE = only for TILE_VER_GFX9 & DCC & (DCC_RETILE | DCC_PIPE_ALIGN) + */ +#define AMD_FMT_MOD_PIPE_XOR_BITS_SHIFT 21 +#define AMD_FMT_MOD_PIPE_XOR_BITS_MASK 0x7 +#define AMD_FMT_MOD_BANK_XOR_BITS_SHIFT 24 +#define AMD_FMT_MOD_BANK_XOR_BITS_MASK 0x7 +#define AMD_FMT_MOD_PACKERS_SHIFT 27 +#define AMD_FMT_MOD_PACKERS_MASK 0x7 +#define AMD_FMT_MOD_RB_SHIFT 30 +#define AMD_FMT_MOD_RB_MASK 0x7 +#define AMD_FMT_MOD_PIPE_SHIFT 33 +#define AMD_FMT_MOD_PIPE_MASK 0x7 + +#define AMD_FMT_MOD_SET(field, value) \ + ((__u64)(value) << AMD_FMT_MOD_##field##_SHIFT) +#define AMD_FMT_MOD_GET(field, value) \ + (((value) >> AMD_FMT_MOD_##field##_SHIFT) & AMD_FMT_MOD_##field##_MASK) +#define AMD_FMT_MOD_CLEAR(field) \ + (~((__u64)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT)) + +#if defined(__cplusplus) +} +#endif + +#endif /* DRM_FOURCC_H */ diff --git a/include/arch/x86_64/drm/drm_mode.h b/include/arch/x86_64/drm/drm_mode.h new file mode 100644 index 00000000..8b6da7da --- /dev/null +++ b/include/arch/x86_64/drm/drm_mode.h @@ -0,0 +1,1345 @@ +/* + * Copyright (c) 2007 Dave Airlie <airlied@linux.ie> + * Copyright (c) 2007 Jakob Bornecrantz <wallbraker@gmail.com> + * Copyright (c) 2008 Red Hat Inc. + * Copyright (c) 2007-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA + * Copyright (c) 2007-2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _DRM_MODE_H +#define _DRM_MODE_H + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/** + * DOC: overview + * + * DRM exposes many UAPI and structure definitions to have a consistent + * and standardized interface with users. + * Userspace can refer to these structure definitions and UAPI formats + * to communicate to drivers. + */ + +#define DRM_CONNECTOR_NAME_LEN 32 +#define DRM_DISPLAY_MODE_LEN 32 +#define DRM_PROP_NAME_LEN 32 + +#define DRM_MODE_TYPE_BUILTIN (1 << 0) /* deprecated */ +#define DRM_MODE_TYPE_CLOCK_C \ + ((1 << 1) | DRM_MODE_TYPE_BUILTIN) /* deprecated */ +#define DRM_MODE_TYPE_CRTC_C ((1 << 2) | DRM_MODE_TYPE_BUILTIN) /* deprecated \ + */ +#define DRM_MODE_TYPE_PREFERRED (1 << 3) +#define DRM_MODE_TYPE_DEFAULT (1 << 4) /* deprecated */ +#define DRM_MODE_TYPE_USERDEF (1 << 5) +#define DRM_MODE_TYPE_DRIVER (1 << 6) + +#define DRM_MODE_TYPE_ALL \ + (DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_USERDEF | DRM_MODE_TYPE_DRIVER) + +/* Video mode flags */ +/* bit compatible with the xrandr RR_ definitions (bits 0-13) + * + * ABI warning: Existing userspace really expects + * the mode flags to match the xrandr definitions. Any + * changes that don't match the xrandr definitions will + * likely need a new client cap or some other mechanism + * to avoid breaking existing userspace. This includes + * allocating new flags in the previously unused bits! + */ +#define DRM_MODE_FLAG_PHSYNC (1 << 0) +#define DRM_MODE_FLAG_NHSYNC (1 << 1) +#define DRM_MODE_FLAG_PVSYNC (1 << 2) +#define DRM_MODE_FLAG_NVSYNC (1 << 3) +#define DRM_MODE_FLAG_INTERLACE (1 << 4) +#define DRM_MODE_FLAG_DBLSCAN (1 << 5) +#define DRM_MODE_FLAG_CSYNC (1 << 6) +#define DRM_MODE_FLAG_PCSYNC (1 << 7) +#define DRM_MODE_FLAG_NCSYNC (1 << 8) +#define DRM_MODE_FLAG_HSKEW (1 << 9) /* hskew provided */ +#define DRM_MODE_FLAG_BCAST (1 << 10) /* deprecated */ +#define DRM_MODE_FLAG_PIXMUX (1 << 11) /* deprecated */ +#define DRM_MODE_FLAG_DBLCLK (1 << 12) +#define DRM_MODE_FLAG_CLKDIV2 (1 << 13) +/* + * When adding a new stereo mode don't forget to adjust DRM_MODE_FLAGS_3D_MAX + * (define not exposed to user space). + */ +#define DRM_MODE_FLAG_3D_MASK (0x1f << 14) +#define DRM_MODE_FLAG_3D_NONE (0 << 14) +#define DRM_MODE_FLAG_3D_FRAME_PACKING (1 << 14) +#define DRM_MODE_FLAG_3D_FIELD_ALTERNATIVE (2 << 14) +#define DRM_MODE_FLAG_3D_LINE_ALTERNATIVE (3 << 14) +#define DRM_MODE_FLAG_3D_SIDE_BY_SIDE_FULL (4 << 14) +#define DRM_MODE_FLAG_3D_L_DEPTH (5 << 14) +#define DRM_MODE_FLAG_3D_L_DEPTH_GFX_GFX_DEPTH (6 << 14) +#define DRM_MODE_FLAG_3D_TOP_AND_BOTTOM (7 << 14) +#define DRM_MODE_FLAG_3D_SIDE_BY_SIDE_HALF (8 << 14) + +/* Picture aspect ratio options */ +#define DRM_MODE_PICTURE_ASPECT_NONE 0 +#define DRM_MODE_PICTURE_ASPECT_4_3 1 +#define DRM_MODE_PICTURE_ASPECT_16_9 2 +#define DRM_MODE_PICTURE_ASPECT_64_27 3 +#define DRM_MODE_PICTURE_ASPECT_256_135 4 + +/* Content type options */ +#define DRM_MODE_CONTENT_TYPE_NO_DATA 0 +#define DRM_MODE_CONTENT_TYPE_GRAPHICS 1 +#define DRM_MODE_CONTENT_TYPE_PHOTO 2 +#define DRM_MODE_CONTENT_TYPE_CINEMA 3 +#define DRM_MODE_CONTENT_TYPE_GAME 4 + +/* Aspect ratio flag bitmask (4 bits 22:19) */ +#define DRM_MODE_FLAG_PIC_AR_MASK (0x0F << 19) +#define DRM_MODE_FLAG_PIC_AR_NONE (DRM_MODE_PICTURE_ASPECT_NONE << 19) +#define DRM_MODE_FLAG_PIC_AR_4_3 (DRM_MODE_PICTURE_ASPECT_4_3 << 19) +#define DRM_MODE_FLAG_PIC_AR_16_9 (DRM_MODE_PICTURE_ASPECT_16_9 << 19) +#define DRM_MODE_FLAG_PIC_AR_64_27 (DRM_MODE_PICTURE_ASPECT_64_27 << 19) +#define DRM_MODE_FLAG_PIC_AR_256_135 (DRM_MODE_PICTURE_ASPECT_256_135 << 19) + +#define DRM_MODE_FLAG_ALL \ + (DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC | \ + DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_INTERLACE | \ + DRM_MODE_FLAG_DBLSCAN | DRM_MODE_FLAG_CSYNC | DRM_MODE_FLAG_PCSYNC | \ + DRM_MODE_FLAG_NCSYNC | DRM_MODE_FLAG_HSKEW | DRM_MODE_FLAG_DBLCLK | \ + DRM_MODE_FLAG_CLKDIV2 | DRM_MODE_FLAG_3D_MASK) + +/* DPMS flags */ +/* bit compatible with the xorg definitions. */ +#define DRM_MODE_DPMS_ON 0 +#define DRM_MODE_DPMS_STANDBY 1 +#define DRM_MODE_DPMS_SUSPEND 2 +#define DRM_MODE_DPMS_OFF 3 + +/* Scaling mode options */ +#define DRM_MODE_SCALE_NONE \ + 0 /* Unmodified timing (display or \ + software can still scale) */ +#define DRM_MODE_SCALE_FULLSCREEN 1 /* Full screen, ignore aspect */ +#define DRM_MODE_SCALE_CENTER 2 /* Centered, no scaling */ +#define DRM_MODE_SCALE_ASPECT 3 /* Full screen, preserve aspect */ + +/* Dithering mode options */ +#define DRM_MODE_DITHERING_OFF 0 +#define DRM_MODE_DITHERING_ON 1 +#define DRM_MODE_DITHERING_AUTO 2 + +/* Dirty info options */ +#define DRM_MODE_DIRTY_OFF 0 +#define DRM_MODE_DIRTY_ON 1 +#define DRM_MODE_DIRTY_ANNOTATE 2 + +/* Link Status options */ +#define DRM_MODE_LINK_STATUS_GOOD 0 +#define DRM_MODE_LINK_STATUS_BAD 1 + +/* + * DRM_MODE_ROTATE_<degrees> + * + * Signals that a drm plane is been rotated <degrees> degrees in counter + * clockwise direction. + * + * This define is provided as a convenience, looking up the property id + * using the name->prop id lookup is the preferred method. + */ +#define DRM_MODE_ROTATE_0 (1 << 0) +#define DRM_MODE_ROTATE_90 (1 << 1) +#define DRM_MODE_ROTATE_180 (1 << 2) +#define DRM_MODE_ROTATE_270 (1 << 3) + +/* + * DRM_MODE_ROTATE_MASK + * + * Bitmask used to look for drm plane rotations. + */ +#define DRM_MODE_ROTATE_MASK \ + (DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_180 | \ + DRM_MODE_ROTATE_270) + +/* + * DRM_MODE_REFLECT_<axis> + * + * Signals that the contents of a drm plane is reflected along the <axis> axis, + * in the same way as mirroring. + * See kerneldoc chapter "Plane Composition Properties" for more details. + * + * This define is provided as a convenience, looking up the property id + * using the name->prop id lookup is the preferred method. + */ +#define DRM_MODE_REFLECT_X (1 << 4) +#define DRM_MODE_REFLECT_Y (1 << 5) + +/* + * DRM_MODE_REFLECT_MASK + * + * Bitmask used to look for drm plane reflections. + */ +#define DRM_MODE_REFLECT_MASK (DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y) + +/* Content Protection Flags */ +#define DRM_MODE_CONTENT_PROTECTION_UNDESIRED 0 +#define DRM_MODE_CONTENT_PROTECTION_DESIRED 1 +#define DRM_MODE_CONTENT_PROTECTION_ENABLED 2 + +/** + * struct drm_mode_modeinfo - Display mode information. + * @clock: pixel clock in kHz + * @hdisplay: horizontal display size + * @hsync_start: horizontal sync start + * @hsync_end: horizontal sync end + * @htotal: horizontal total size + * @hskew: horizontal skew + * @vdisplay: vertical display size + * @vsync_start: vertical sync start + * @vsync_end: vertical sync end + * @vtotal: vertical total size + * @vscan: vertical scan + * @vrefresh: approximate vertical refresh rate in Hz + * @flags: bitmask of misc. flags, see DRM_MODE_FLAG_* defines + * @type: bitmask of type flags, see DRM_MODE_TYPE_* defines + * @name: string describing the mode resolution + * + * This is the user-space API display mode information structure. For the + * kernel version see struct drm_display_mode. + */ +struct drm_mode_modeinfo { + __u32 clock; + __u16 hdisplay; + __u16 hsync_start; + __u16 hsync_end; + __u16 htotal; + __u16 hskew; + __u16 vdisplay; + __u16 vsync_start; + __u16 vsync_end; + __u16 vtotal; + __u16 vscan; + + __u32 vrefresh; + + __u32 flags; + __u32 type; + char name[DRM_DISPLAY_MODE_LEN]; +}; + +struct drm_mode_card_res { + __u64 fb_id_ptr; + __u64 crtc_id_ptr; + __u64 connector_id_ptr; + __u64 encoder_id_ptr; + __u32 count_fbs; + __u32 count_crtcs; + __u32 count_connectors; + __u32 count_encoders; + __u32 min_width; + __u32 max_width; + __u32 min_height; + __u32 max_height; +}; + +struct drm_mode_crtc { + __u64 set_connectors_ptr; + __u32 count_connectors; + + __u32 crtc_id; /**< Id */ + __u32 fb_id; /**< Id of framebuffer */ + + __u32 x; /**< x Position on the framebuffer */ + __u32 y; /**< y Position on the framebuffer */ + + __u32 gamma_size; + __u32 mode_valid; + struct drm_mode_modeinfo mode; +}; + +#define DRM_MODE_PRESENT_TOP_FIELD (1 << 0) +#define DRM_MODE_PRESENT_BOTTOM_FIELD (1 << 1) + +/* Planes blend with or override other bits on the CRTC */ +struct drm_mode_set_plane { + __u32 plane_id; + __u32 crtc_id; + __u32 fb_id; /* fb object contains surface format type */ + __u32 flags; /* see above flags */ + + /* Signed dest location allows it to be partially off screen */ + __s32 crtc_x; + __s32 crtc_y; + __u32 crtc_w; + __u32 crtc_h; + + /* Source values are 16.16 fixed point */ + __u32 src_x; + __u32 src_y; + __u32 src_h; + __u32 src_w; +}; + +/** + * struct drm_mode_get_plane - Get plane metadata. + * + * Userspace can perform a GETPLANE ioctl to retrieve information about a + * plane. + * + * To retrieve the number of formats supported, set @count_format_types to zero + * and call the ioctl. @count_format_types will be updated with the value. + * + * To retrieve these formats, allocate an array with the memory needed to store + * @count_format_types formats. Point @format_type_ptr to this array and call + * the ioctl again (with @count_format_types still set to the value returned in + * the first ioctl call). + */ +struct drm_mode_get_plane { + /** + * @plane_id: Object ID of the plane whose information should be + * retrieved. Set by caller. + */ + __u32 plane_id; + + /** @crtc_id: Object ID of the current CRTC. */ + __u32 crtc_id; + /** @fb_id: Object ID of the current fb. */ + __u32 fb_id; + + /** + * @possible_crtcs: Bitmask of CRTC's compatible with the plane. CRTC's + * are created and they receive an index, which corresponds to their + * position in the bitmask. Bit N corresponds to + * :ref:`CRTC index<crtc_index>` N. + */ + __u32 possible_crtcs; + /** @gamma_size: Never used. */ + __u32 gamma_size; + + /** @count_format_types: Number of formats. */ + __u32 count_format_types; + /** + * @format_type_ptr: Pointer to ``__u32`` array of formats that are + * supported by the plane. These formats do not require modifiers. + */ + __u64 format_type_ptr; +}; + +struct drm_mode_get_plane_res { + __u64 plane_id_ptr; + __u32 count_planes; +}; + +#define DRM_MODE_ENCODER_NONE 0 +#define DRM_MODE_ENCODER_DAC 1 +#define DRM_MODE_ENCODER_TMDS 2 +#define DRM_MODE_ENCODER_LVDS 3 +#define DRM_MODE_ENCODER_TVDAC 4 +#define DRM_MODE_ENCODER_VIRTUAL 5 +#define DRM_MODE_ENCODER_DSI 6 +#define DRM_MODE_ENCODER_DPMST 7 +#define DRM_MODE_ENCODER_DPI 8 + +struct drm_mode_get_encoder { + __u32 encoder_id; + __u32 encoder_type; + + __u32 crtc_id; /**< Id of crtc */ + + __u32 possible_crtcs; + __u32 possible_clones; +}; + +/* This is for connectors with multiple signal types. */ +/* Try to match DRM_MODE_CONNECTOR_X as closely as possible. */ +enum drm_mode_subconnector { + DRM_MODE_SUBCONNECTOR_Automatic = 0, /* DVI-I, TV */ + DRM_MODE_SUBCONNECTOR_Unknown = 0, /* DVI-I, TV, DP */ + DRM_MODE_SUBCONNECTOR_VGA = 1, /* DP */ + DRM_MODE_SUBCONNECTOR_DVID = 3, /* DVI-I DP */ + DRM_MODE_SUBCONNECTOR_DVIA = 4, /* DVI-I */ + DRM_MODE_SUBCONNECTOR_Composite = 5, /* TV */ + DRM_MODE_SUBCONNECTOR_SVIDEO = 6, /* TV */ + DRM_MODE_SUBCONNECTOR_Component = 8, /* TV */ + DRM_MODE_SUBCONNECTOR_SCART = 9, /* TV */ + DRM_MODE_SUBCONNECTOR_DisplayPort = 10, /* DP */ + DRM_MODE_SUBCONNECTOR_HDMIA = 11, /* DP */ + DRM_MODE_SUBCONNECTOR_Native = 15, /* DP */ + DRM_MODE_SUBCONNECTOR_Wireless = 18, /* DP */ +}; + +#define DRM_MODE_CONNECTOR_Unknown 0 +#define DRM_MODE_CONNECTOR_VGA 1 +#define DRM_MODE_CONNECTOR_DVII 2 +#define DRM_MODE_CONNECTOR_DVID 3 +#define DRM_MODE_CONNECTOR_DVIA 4 +#define DRM_MODE_CONNECTOR_Composite 5 +#define DRM_MODE_CONNECTOR_SVIDEO 6 +#define DRM_MODE_CONNECTOR_LVDS 7 +#define DRM_MODE_CONNECTOR_Component 8 +#define DRM_MODE_CONNECTOR_9PinDIN 9 +#define DRM_MODE_CONNECTOR_DisplayPort 10 +#define DRM_MODE_CONNECTOR_HDMIA 11 +#define DRM_MODE_CONNECTOR_HDMIB 12 +#define DRM_MODE_CONNECTOR_TV 13 +#define DRM_MODE_CONNECTOR_eDP 14 +#define DRM_MODE_CONNECTOR_VIRTUAL 15 +#define DRM_MODE_CONNECTOR_DSI 16 +#define DRM_MODE_CONNECTOR_DPI 17 +#define DRM_MODE_CONNECTOR_WRITEBACK 18 +#define DRM_MODE_CONNECTOR_SPI 19 +#define DRM_MODE_CONNECTOR_USB 20 + +/** + * struct drm_mode_get_connector - Get connector metadata. + * + * User-space can perform a GETCONNECTOR ioctl to retrieve information about a + * connector. User-space is expected to retrieve encoders, modes and properties + * by performing this ioctl at least twice: the first time to retrieve the + * number of elements, the second time to retrieve the elements themselves. + * + * To retrieve the number of elements, set @count_props and @count_encoders to + * zero, set @count_modes to 1, and set @modes_ptr to a temporary struct + * drm_mode_modeinfo element. + * + * To retrieve the elements, allocate arrays for @encoders_ptr, @modes_ptr, + * @props_ptr and @prop_values_ptr, then set @count_modes, @count_props and + * @count_encoders to their capacity. + * + * Performing the ioctl only twice may be racy: the number of elements may have + * changed with a hotplug event in-between the two ioctls. User-space is + * expected to retry the last ioctl until the number of elements stabilizes. + * The kernel won't fill any array which doesn't have the expected length. + * + * **Force-probing a connector** + * + * If the @count_modes field is set to zero and the DRM client is the current + * DRM master, the kernel will perform a forced probe on the connector to + * refresh the connector status, modes and EDID. A forced-probe can be slow, + * might cause flickering and the ioctl will block. + * + * User-space needs to force-probe connectors to ensure their metadata is + * up-to-date at startup and after receiving a hot-plug event. User-space + * may perform a forced-probe when the user explicitly requests it. User-space + * shouldn't perform a forced-probe in other situations. + */ +struct drm_mode_get_connector { + /** @encoders_ptr: Pointer to ``__u32`` array of object IDs. */ + __u64 encoders_ptr; + /** @modes_ptr: Pointer to struct drm_mode_modeinfo array. */ + __u64 modes_ptr; + /** @props_ptr: Pointer to ``__u32`` array of property IDs. */ + __u64 props_ptr; + /** @prop_values_ptr: Pointer to ``__u64`` array of property values. */ + __u64 prop_values_ptr; + + /** @count_modes: Number of modes. */ + __u32 count_modes; + /** @count_props: Number of properties. */ + __u32 count_props; + /** @count_encoders: Number of encoders. */ + __u32 count_encoders; + + /** @encoder_id: Object ID of the current encoder. */ + __u32 encoder_id; + /** @connector_id: Object ID of the connector. */ + __u32 connector_id; + /** + * @connector_type: Type of the connector. + * + * See DRM_MODE_CONNECTOR_* defines. + */ + __u32 connector_type; + /** + * @connector_type_id: Type-specific connector number. + * + * This is not an object ID. This is a per-type connector number. Each + * (type, type_id) combination is unique across all connectors of a DRM + * device. + * + * The (type, type_id) combination is not a stable identifier: the + * type_id can change depending on the driver probe order. + */ + __u32 connector_type_id; + + /** + * @connection: Status of the connector. + * + * See enum drm_connector_status. + */ + __u32 connection; + /** @mm_width: Width of the connected sink in millimeters. */ + __u32 mm_width; + /** @mm_height: Height of the connected sink in millimeters. */ + __u32 mm_height; + /** + * @subpixel: Subpixel order of the connected sink. + * + * See enum subpixel_order. + */ + __u32 subpixel; + + /** @pad: Padding, must be zero. */ + __u32 pad; +}; + +#define DRM_MODE_PROP_PENDING (1 << 0) /* deprecated, do not use */ +#define DRM_MODE_PROP_RANGE (1 << 1) +#define DRM_MODE_PROP_IMMUTABLE (1 << 2) +#define DRM_MODE_PROP_ENUM (1 << 3) /* enumerated type with text strings */ +#define DRM_MODE_PROP_BLOB (1 << 4) +#define DRM_MODE_PROP_BITMASK (1 << 5) /* bitmask of enumerated types */ + +/* non-extended types: legacy bitmask, one bit per type: */ +#define DRM_MODE_PROP_LEGACY_TYPE \ + (DRM_MODE_PROP_RANGE | DRM_MODE_PROP_ENUM | DRM_MODE_PROP_BLOB | \ + DRM_MODE_PROP_BITMASK) + +/* extended-types: rather than continue to consume a bit per type, + * grab a chunk of the bits to use as integer type id. + */ +#define DRM_MODE_PROP_EXTENDED_TYPE 0x0000ffc0 +#define DRM_MODE_PROP_TYPE(n) ((n) << 6) +#define DRM_MODE_PROP_OBJECT DRM_MODE_PROP_TYPE(1) +#define DRM_MODE_PROP_SIGNED_RANGE DRM_MODE_PROP_TYPE(2) + +/* the PROP_ATOMIC flag is used to hide properties from userspace that + * is not aware of atomic properties. This is mostly to work around + * older userspace (DDX drivers) that read/write each prop they find, + * without being aware that this could be triggering a lengthy modeset. + */ +#define DRM_MODE_PROP_ATOMIC 0x80000000 + +/** + * struct drm_mode_property_enum - Description for an enum/bitfield entry. + * @value: numeric value for this enum entry. + * @name: symbolic name for this enum entry. + * + * See struct drm_property_enum for details. + */ +struct drm_mode_property_enum { + __u64 value; + char name[DRM_PROP_NAME_LEN]; +}; + +/** + * struct drm_mode_get_property - Get property metadata. + * + * User-space can perform a GETPROPERTY ioctl to retrieve information about a + * property. The same property may be attached to multiple objects, see + * "Modeset Base Object Abstraction". + * + * The meaning of the @values_ptr field changes depending on the property type. + * See &drm_property.flags for more details. + * + * The @enum_blob_ptr and @count_enum_blobs fields are only meaningful when the + * property has the type &DRM_MODE_PROP_ENUM or &DRM_MODE_PROP_BITMASK. For + * backwards compatibility, the kernel will always set @count_enum_blobs to + * zero when the property has the type &DRM_MODE_PROP_BLOB. User-space must + * ignore these two fields if the property has a different type. + * + * User-space is expected to retrieve values and enums by performing this ioctl + * at least twice: the first time to retrieve the number of elements, the + * second time to retrieve the elements themselves. + * + * To retrieve the number of elements, set @count_values and @count_enum_blobs + * to zero, then call the ioctl. @count_values will be updated with the number + * of elements. If the property has the type &DRM_MODE_PROP_ENUM or + * &DRM_MODE_PROP_BITMASK, @count_enum_blobs will be updated as well. + * + * To retrieve the elements themselves, allocate an array for @values_ptr and + * set @count_values to its capacity. If the property has the type + * &DRM_MODE_PROP_ENUM or &DRM_MODE_PROP_BITMASK, allocate an array for + * @enum_blob_ptr and set @count_enum_blobs to its capacity. Calling the ioctl + * again will fill the arrays. + */ +struct drm_mode_get_property { + /** @values_ptr: Pointer to a ``__u64`` array. */ + __u64 values_ptr; + /** @enum_blob_ptr: Pointer to a struct drm_mode_property_enum array. */ + __u64 enum_blob_ptr; + + /** + * @prop_id: Object ID of the property which should be retrieved. Set + * by the caller. + */ + __u32 prop_id; + /** + * @flags: ``DRM_MODE_PROP_*`` bitfield. See &drm_property.flags for + * a definition of the flags. + */ + __u32 flags; + /** + * @name: Symbolic property name. User-space should use this field to + * recognize properties. + */ + char name[DRM_PROP_NAME_LEN]; + + /** @count_values: Number of elements in @values_ptr. */ + __u32 count_values; + /** @count_enum_blobs: Number of elements in @enum_blob_ptr. */ + __u32 count_enum_blobs; +}; + +struct drm_mode_connector_set_property { + __u64 value; + __u32 prop_id; + __u32 connector_id; +}; + +#define DRM_MODE_OBJECT_CRTC 0xcccccccc +#define DRM_MODE_OBJECT_CONNECTOR 0xc0c0c0c0 +#define DRM_MODE_OBJECT_ENCODER 0xe0e0e0e0 +#define DRM_MODE_OBJECT_MODE 0xdededede +#define DRM_MODE_OBJECT_PROPERTY 0xb0b0b0b0 +#define DRM_MODE_OBJECT_FB 0xfbfbfbfb +#define DRM_MODE_OBJECT_BLOB 0xbbbbbbbb +#define DRM_MODE_OBJECT_PLANE 0xeeeeeeee +#define DRM_MODE_OBJECT_ANY 0 + +struct drm_mode_obj_get_properties { + __u64 props_ptr; + __u64 prop_values_ptr; + __u32 count_props; + __u32 obj_id; + __u32 obj_type; +}; + +struct drm_mode_obj_set_property { + __u64 value; + __u32 prop_id; + __u32 obj_id; + __u32 obj_type; +}; + +struct drm_mode_get_blob { + __u32 blob_id; + __u32 length; + __u64 data; +}; + +struct drm_mode_fb_cmd { + __u32 fb_id; + __u32 width; + __u32 height; + __u32 pitch; + __u32 bpp; + __u32 depth; + /* driver specific handle */ + __u32 handle; +}; + +#define DRM_MODE_FB_INTERLACED (1 << 0) /* for interlaced framebuffers */ +#define DRM_MODE_FB_MODIFIERS (1 << 1) /* enables ->modifier[] */ + +/** + * struct drm_mode_fb_cmd2 - Frame-buffer metadata. + * + * This struct holds frame-buffer metadata. There are two ways to use it: + * + * - User-space can fill this struct and perform a &DRM_IOCTL_MODE_ADDFB2 + * ioctl to register a new frame-buffer. The new frame-buffer object ID will + * be set by the kernel in @fb_id. + * - User-space can set @fb_id and perform a &DRM_IOCTL_MODE_GETFB2 ioctl to + * fetch metadata about an existing frame-buffer. + * + * In case of planar formats, this struct allows up to 4 buffer objects with + * offsets and pitches per plane. The pitch and offset order are dictated by + * the format FourCC as defined by ``drm_fourcc.h``, e.g. NV12 is described as: + * + * YUV 4:2:0 image with a plane of 8-bit Y samples followed by an + * interleaved U/V plane containing 8-bit 2x2 subsampled colour difference + * samples. + * + * So it would consist of a Y plane at ``offsets[0]`` and a UV plane at + * ``offsets[1]``. + * + * To accommodate tiled, compressed, etc formats, a modifier can be specified. + * For more information see the "Format Modifiers" section. Note that even + * though it looks like we have a modifier per-plane, we in fact do not. The + * modifier for each plane must be identical. Thus all combinations of + * different data layouts for multi-plane formats must be enumerated as + * separate modifiers. + * + * All of the entries in @handles, @pitches, @offsets and @modifier must be + * zero when unused. Warning, for @offsets and @modifier zero can't be used to + * figure out whether the entry is used or not since it's a valid value (a zero + * offset is common, and a zero modifier is &DRM_FORMAT_MOD_LINEAR). + */ +struct drm_mode_fb_cmd2 { + /** @fb_id: Object ID of the frame-buffer. */ + __u32 fb_id; + /** @width: Width of the frame-buffer. */ + __u32 width; + /** @height: Height of the frame-buffer. */ + __u32 height; + /** + * @pixel_format: FourCC format code, see ``DRM_FORMAT_*`` constants in + * ``drm_fourcc.h``. + */ + __u32 pixel_format; + /** + * @flags: Frame-buffer flags (see &DRM_MODE_FB_INTERLACED and + * &DRM_MODE_FB_MODIFIERS). + */ + __u32 flags; + + /** + * @handles: GEM buffer handle, one per plane. Set to 0 if the plane is + * unused. The same handle can be used for multiple planes. + */ + __u32 handles[4]; + /** @pitches: Pitch (aka. stride) in bytes, one per plane. */ + __u32 pitches[4]; + /** @offsets: Offset into the buffer in bytes, one per plane. */ + __u32 offsets[4]; + /** + * @modifier: Format modifier, one per plane. See ``DRM_FORMAT_MOD_*`` + * constants in ``drm_fourcc.h``. All planes must use the same + * modifier. Ignored unless &DRM_MODE_FB_MODIFIERS is set in @flags. + */ + __u64 modifier[4]; +}; + +#define DRM_MODE_FB_DIRTY_ANNOTATE_COPY 0x01 +#define DRM_MODE_FB_DIRTY_ANNOTATE_FILL 0x02 +#define DRM_MODE_FB_DIRTY_FLAGS 0x03 + +#define DRM_MODE_FB_DIRTY_MAX_CLIPS 256 + +/* + * Mark a region of a framebuffer as dirty. + * + * Some hardware does not automatically update display contents + * as a hardware or software draw to a framebuffer. This ioctl + * allows userspace to tell the kernel and the hardware what + * regions of the framebuffer have changed. + * + * The kernel or hardware is free to update more then just the + * region specified by the clip rects. The kernel or hardware + * may also delay and/or coalesce several calls to dirty into a + * single update. + * + * Userspace may annotate the updates, the annotates are a + * promise made by the caller that the change is either a copy + * of pixels or a fill of a single color in the region specified. + * + * If the DRM_MODE_FB_DIRTY_ANNOTATE_COPY flag is given then + * the number of updated regions are half of num_clips given, + * where the clip rects are paired in src and dst. The width and + * height of each one of the pairs must match. + * + * If the DRM_MODE_FB_DIRTY_ANNOTATE_FILL flag is given the caller + * promises that the region specified of the clip rects is filled + * completely with a single color as given in the color argument. + */ + +struct drm_mode_fb_dirty_cmd { + __u32 fb_id; + __u32 flags; + __u32 color; + __u32 num_clips; + __u64 clips_ptr; +}; + +struct drm_mode_mode_cmd { + __u32 connector_id; + struct drm_mode_modeinfo mode; +}; + +#define DRM_MODE_CURSOR_BO 0x01 +#define DRM_MODE_CURSOR_MOVE 0x02 +#define DRM_MODE_CURSOR_FLAGS 0x03 + +/* + * depending on the value in flags different members are used. + * + * CURSOR_BO uses + * crtc_id + * width + * height + * handle - if 0 turns the cursor off + * + * CURSOR_MOVE uses + * crtc_id + * x + * y + */ +struct drm_mode_cursor { + __u32 flags; + __u32 crtc_id; + __s32 x; + __s32 y; + __u32 width; + __u32 height; + /* driver specific handle */ + __u32 handle; +}; + +struct drm_mode_cursor2 { + __u32 flags; + __u32 crtc_id; + __s32 x; + __s32 y; + __u32 width; + __u32 height; + /* driver specific handle */ + __u32 handle; + __s32 hot_x; + __s32 hot_y; +}; + +struct drm_mode_crtc_lut { + __u32 crtc_id; + __u32 gamma_size; + + /* pointers to arrays */ + __u64 red; + __u64 green; + __u64 blue; +}; + +struct drm_color_ctm { + /* + * Conversion matrix in S31.32 sign-magnitude + * (not two's complement!) format. + * + * out matrix in + * |R| |0 1 2| |R| + * |G| = |3 4 5| x |G| + * |B| |6 7 8| |B| + */ + __u64 matrix[9]; +}; + +struct drm_color_lut { + /* + * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and + * 0xffff == 1.0. + */ + __u16 red; + __u16 green; + __u16 blue; + __u16 reserved; +}; + +/** + * struct drm_plane_size_hint - Plane size hints + * @width: The width of the plane in pixel + * @height: The height of the plane in pixel + * + * The plane SIZE_HINTS property blob contains an + * array of struct drm_plane_size_hint. + */ +struct drm_plane_size_hint { + __u16 width; + __u16 height; +}; + +/** + * struct hdr_metadata_infoframe - HDR Metadata Infoframe Data. + * + * HDR Metadata Infoframe as per CTA 861.G spec. This is expected + * to match exactly with the spec. + * + * Userspace is expected to pass the metadata information as per + * the format described in this structure. + */ +struct hdr_metadata_infoframe { + /** + * @eotf: Electro-Optical Transfer Function (EOTF) + * used in the stream. + */ + __u8 eotf; + /** + * @metadata_type: Static_Metadata_Descriptor_ID. + */ + __u8 metadata_type; + /** + * @display_primaries: Color Primaries of the Data. + * These are coded as unsigned 16-bit values in units of + * 0.00002, where 0x0000 represents zero and 0xC350 + * represents 1.0000. + * @display_primaries.x: X coordinate of color primary. + * @display_primaries.y: Y coordinate of color primary. + */ + struct { + __u16 x, y; + } display_primaries[3]; + /** + * @white_point: White Point of Colorspace Data. + * These are coded as unsigned 16-bit values in units of + * 0.00002, where 0x0000 represents zero and 0xC350 + * represents 1.0000. + * @white_point.x: X coordinate of whitepoint of color primary. + * @white_point.y: Y coordinate of whitepoint of color primary. + */ + struct { + __u16 x, y; + } white_point; + /** + * @max_display_mastering_luminance: Max Mastering Display Luminance. + * This value is coded as an unsigned 16-bit value in units of 1 cd/m2, + * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2. + */ + __u16 max_display_mastering_luminance; + /** + * @min_display_mastering_luminance: Min Mastering Display Luminance. + * This value is coded as an unsigned 16-bit value in units of + * 0.0001 cd/m2, where 0x0001 represents 0.0001 cd/m2 and 0xFFFF + * represents 6.5535 cd/m2. + */ + __u16 min_display_mastering_luminance; + /** + * @max_cll: Max Content Light Level. + * This value is coded as an unsigned 16-bit value in units of 1 cd/m2, + * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2. + */ + __u16 max_cll; + /** + * @max_fall: Max Frame Average Light Level. + * This value is coded as an unsigned 16-bit value in units of 1 cd/m2, + * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2. + */ + __u16 max_fall; +}; + +/** + * struct hdr_output_metadata - HDR output metadata + * + * Metadata Information to be passed from userspace + */ +struct hdr_output_metadata { + /** + * @metadata_type: Static_Metadata_Descriptor_ID. + */ + __u32 metadata_type; + /** + * @hdmi_metadata_type1: HDR Metadata Infoframe. + */ + union { + struct hdr_metadata_infoframe hdmi_metadata_type1; + }; +}; + +/** + * DRM_MODE_PAGE_FLIP_EVENT + * + * Request that the kernel sends back a vblank event (see + * struct drm_event_vblank) with the &DRM_EVENT_FLIP_COMPLETE type when the + * page-flip is done. + */ +#define DRM_MODE_PAGE_FLIP_EVENT 0x01 +/** + * DRM_MODE_PAGE_FLIP_ASYNC + * + * Request that the page-flip is performed as soon as possible, ie. with no + * delay due to waiting for vblank. This may cause tearing to be visible on + * the screen. + * + * When used with atomic uAPI, the driver will return an error if the hardware + * doesn't support performing an asynchronous page-flip for this update. + * User-space should handle this, e.g. by falling back to a regular page-flip. + * + * Note, some hardware might need to perform one last synchronous page-flip + * before being able to switch to asynchronous page-flips. As an exception, + * the driver will return success even though that first page-flip is not + * asynchronous. + */ +#define DRM_MODE_PAGE_FLIP_ASYNC 0x02 +#define DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE 0x4 +#define DRM_MODE_PAGE_FLIP_TARGET_RELATIVE 0x8 +#define DRM_MODE_PAGE_FLIP_TARGET \ + (DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE | \ + DRM_MODE_PAGE_FLIP_TARGET_RELATIVE) +/** + * DRM_MODE_PAGE_FLIP_FLAGS + * + * Bitmask of flags suitable for &drm_mode_crtc_page_flip_target.flags. + */ +#define DRM_MODE_PAGE_FLIP_FLAGS \ + (DRM_MODE_PAGE_FLIP_EVENT | DRM_MODE_PAGE_FLIP_ASYNC | \ + DRM_MODE_PAGE_FLIP_TARGET) + +/* + * Request a page flip on the specified crtc. + * + * This ioctl will ask KMS to schedule a page flip for the specified + * crtc. Once any pending rendering targeting the specified fb (as of + * ioctl time) has completed, the crtc will be reprogrammed to display + * that fb after the next vertical refresh. The ioctl returns + * immediately, but subsequent rendering to the current fb will block + * in the execbuffer ioctl until the page flip happens. If a page + * flip is already pending as the ioctl is called, EBUSY will be + * returned. + * + * Flag DRM_MODE_PAGE_FLIP_EVENT requests that drm sends back a vblank + * event (see drm.h: struct drm_event_vblank) when the page flip is + * done. The user_data field passed in with this ioctl will be + * returned as the user_data field in the vblank event struct. + * + * Flag DRM_MODE_PAGE_FLIP_ASYNC requests that the flip happen + * 'as soon as possible', meaning that it not delay waiting for vblank. + * This may cause tearing on the screen. + * + * The reserved field must be zero. + */ + +struct drm_mode_crtc_page_flip { + __u32 crtc_id; + __u32 fb_id; + __u32 flags; + __u32 reserved; + __u64 user_data; +}; + +/* + * Request a page flip on the specified crtc. + * + * Same as struct drm_mode_crtc_page_flip, but supports new flags and + * re-purposes the reserved field: + * + * The sequence field must be zero unless either of the + * DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags is specified. When + * the ABSOLUTE flag is specified, the sequence field denotes the absolute + * vblank sequence when the flip should take effect. When the RELATIVE + * flag is specified, the sequence field denotes the relative (to the + * current one when the ioctl is called) vblank sequence when the flip + * should take effect. NOTE: DRM_IOCTL_WAIT_VBLANK must still be used to + * make sure the vblank sequence before the target one has passed before + * calling this ioctl. The purpose of the + * DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags is merely to clarify + * the target for when code dealing with a page flip runs during a + * vertical blank period. + */ + +struct drm_mode_crtc_page_flip_target { + __u32 crtc_id; + __u32 fb_id; + __u32 flags; + __u32 sequence; + __u64 user_data; +}; + +/** + * struct drm_mode_create_dumb - Create a KMS dumb buffer for scanout. + * @height: buffer height in pixels + * @width: buffer width in pixels + * @bpp: bits per pixel + * @flags: must be zero + * @handle: buffer object handle + * @pitch: number of bytes between two consecutive lines + * @size: size of the whole buffer in bytes + * + * User-space fills @height, @width, @bpp and @flags. If the IOCTL succeeds, + * the kernel fills @handle, @pitch and @size. + */ +struct drm_mode_create_dumb { + __u32 height; + __u32 width; + __u32 bpp; + __u32 flags; + + __u32 handle; + __u32 pitch; + __u64 size; +}; + +/* set up for mmap of a dumb scanout buffer */ +struct drm_mode_map_dumb { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 pad; + /** + * Fake offset to use for subsequent mmap call + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 offset; +}; + +struct drm_mode_destroy_dumb { + __u32 handle; +}; + +/** + * DRM_MODE_ATOMIC_TEST_ONLY + * + * Do not apply the atomic commit, instead check whether the hardware supports + * this configuration. + * + * See &drm_mode_config_funcs.atomic_check for more details on test-only + * commits. + */ +#define DRM_MODE_ATOMIC_TEST_ONLY 0x0100 +/** + * DRM_MODE_ATOMIC_NONBLOCK + * + * Do not block while applying the atomic commit. The &DRM_IOCTL_MODE_ATOMIC + * IOCTL returns immediately instead of waiting for the changes to be applied + * in hardware. Note, the driver will still check that the update can be + * applied before retuning. + */ +#define DRM_MODE_ATOMIC_NONBLOCK 0x0200 +/** + * DRM_MODE_ATOMIC_ALLOW_MODESET + * + * Allow the update to result in temporary or transient visible artifacts while + * the update is being applied. Applying the update may also take significantly + * more time than a page flip. All visual artifacts will disappear by the time + * the update is completed, as signalled through the vblank event's timestamp + * (see struct drm_event_vblank). + * + * This flag must be set when the KMS update might cause visible artifacts. + * Without this flag such KMS update will return a EINVAL error. What kind of + * update may cause visible artifacts depends on the driver and the hardware. + * User-space that needs to know beforehand if an update might cause visible + * artifacts can use &DRM_MODE_ATOMIC_TEST_ONLY without + * &DRM_MODE_ATOMIC_ALLOW_MODESET to see if it fails. + * + * To the best of the driver's knowledge, visual artifacts are guaranteed to + * not appear when this flag is not set. Some sinks might display visual + * artifacts outside of the driver's control. + */ +#define DRM_MODE_ATOMIC_ALLOW_MODESET 0x0400 + +/** + * DRM_MODE_ATOMIC_FLAGS + * + * Bitfield of flags accepted by the &DRM_IOCTL_MODE_ATOMIC IOCTL in + * &drm_mode_atomic.flags. + */ +#define DRM_MODE_ATOMIC_FLAGS \ + (DRM_MODE_PAGE_FLIP_EVENT | DRM_MODE_PAGE_FLIP_ASYNC | \ + DRM_MODE_ATOMIC_TEST_ONLY | DRM_MODE_ATOMIC_NONBLOCK | \ + DRM_MODE_ATOMIC_ALLOW_MODESET) + +struct drm_mode_atomic { + __u32 flags; + __u32 count_objs; + __u64 objs_ptr; + __u64 count_props_ptr; + __u64 props_ptr; + __u64 prop_values_ptr; + __u64 reserved; + __u64 user_data; +}; + +struct drm_format_modifier_blob { +#define FORMAT_BLOB_CURRENT 1 + /* Version of this blob format */ + __u32 version; + + /* Flags */ + __u32 flags; + + /* Number of fourcc formats supported */ + __u32 count_formats; + + /* Where in this blob the formats exist (in bytes) */ + __u32 formats_offset; + + /* Number of drm_format_modifiers */ + __u32 count_modifiers; + + /* Where in this blob the modifiers exist (in bytes) */ + __u32 modifiers_offset; + + /* __u32 formats[] */ + /* struct drm_format_modifier modifiers[] */ +}; + +struct drm_format_modifier { + /* Bitmask of formats in get_plane format list this info applies to. The + * offset allows a sliding window of which 64 formats (bits). + * + * Some examples: + * In today's world with < 65 formats, and formats 0, and 2 are + * supported + * 0x0000000000000005 + * ^-offset = 0, formats = 5 + * + * If the number formats grew to 128, and formats 98-102 are + * supported with the modifier: + * + * 0x0000007c00000000 0000000000000000 + * ^ + * |__offset = 64, formats = 0x7c00000000 + * + */ + __u64 formats; + __u32 offset; + __u32 pad; + + /* The modifier that applies to the >get_plane format list bitmask. */ + __u64 modifier; +}; + +/** + * struct drm_mode_create_blob - Create New blob property + * + * Create a new 'blob' data property, copying length bytes from data pointer, + * and returning new blob ID. + */ +struct drm_mode_create_blob { + /** @data: Pointer to data to copy. */ + __u64 data; + /** @length: Length of data to copy. */ + __u32 length; + /** @blob_id: Return: new property ID. */ + __u32 blob_id; +}; + +/** + * struct drm_mode_destroy_blob - Destroy user blob + * @blob_id: blob_id to destroy + * + * Destroy a user-created blob property. + * + * User-space can release blobs as soon as they do not need to refer to them by + * their blob object ID. For instance, if you are using a MODE_ID blob in an + * atomic commit and you will not make another commit re-using the same ID, you + * can destroy the blob as soon as the commit has been issued, without waiting + * for it to complete. + */ +struct drm_mode_destroy_blob { + __u32 blob_id; +}; + +/** + * struct drm_mode_create_lease - Create lease + * + * Lease mode resources, creating another drm_master. + * + * The @object_ids array must reference at least one CRTC, one connector and + * one plane if &DRM_CLIENT_CAP_UNIVERSAL_PLANES is enabled. Alternatively, + * the lease can be completely empty. + */ +struct drm_mode_create_lease { + /** @object_ids: Pointer to array of object ids (__u32) */ + __u64 object_ids; + /** @object_count: Number of object ids */ + __u32 object_count; + /** @flags: flags for new FD (O_CLOEXEC, etc) */ + __u32 flags; + + /** @lessee_id: Return: unique identifier for lessee. */ + __u32 lessee_id; + /** @fd: Return: file descriptor to new drm_master file */ + __u32 fd; +}; + +/** + * struct drm_mode_list_lessees - List lessees + * + * List lesses from a drm_master. + */ +struct drm_mode_list_lessees { + /** + * @count_lessees: Number of lessees. + * + * On input, provides length of the array. + * On output, provides total number. No + * more than the input number will be written + * back, so two calls can be used to get + * the size and then the data. + */ + __u32 count_lessees; + /** @pad: Padding. */ + __u32 pad; + + /** + * @lessees_ptr: Pointer to lessees. + * + * Pointer to __u64 array of lessee ids + */ + __u64 lessees_ptr; +}; + +/** + * struct drm_mode_get_lease - Get Lease + * + * Get leased objects. + */ +struct drm_mode_get_lease { + /** + * @count_objects: Number of leased objects. + * + * On input, provides length of the array. + * On output, provides total number. No + * more than the input number will be written + * back, so two calls can be used to get + * the size and then the data. + */ + __u32 count_objects; + /** @pad: Padding. */ + __u32 pad; + + /** + * @objects_ptr: Pointer to objects. + * + * Pointer to __u32 array of object ids. + */ + __u64 objects_ptr; +}; + +/** + * struct drm_mode_revoke_lease - Revoke lease + */ +struct drm_mode_revoke_lease { + /** @lessee_id: Unique ID of lessee */ + __u32 lessee_id; +}; + +/** + * struct drm_mode_rect - Two dimensional rectangle. + * @x1: Horizontal starting coordinate (inclusive). + * @y1: Vertical starting coordinate (inclusive). + * @x2: Horizontal ending coordinate (exclusive). + * @y2: Vertical ending coordinate (exclusive). + * + * With drm subsystem using struct drm_rect to manage rectangular area this + * export it to user-space. + * + * Currently used by drm_mode_atomic blob property FB_DAMAGE_CLIPS. + */ +struct drm_mode_rect { + __s32 x1; + __s32 y1; + __s32 x2; + __s32 y2; +}; + +/** + * struct drm_mode_closefb + * @fb_id: Framebuffer ID. + * @pad: Must be zero. + */ +struct drm_mode_closefb { + __u32 fb_id; + __u32 pad; +}; + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/include/arch/x86_64/drm/drm_sarea.h b/include/arch/x86_64/drm/drm_sarea.h new file mode 100644 index 00000000..645d4925 --- /dev/null +++ b/include/arch/x86_64/drm/drm_sarea.h @@ -0,0 +1,94 @@ +/** + * \file drm_sarea.h + * \brief SAREA definitions + * + * \author Michel Dänzer <michel@daenzer.net> + */ + +/* + * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DRM_SAREA_H_ +#define _DRM_SAREA_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* SAREA area needs to be at least a page */ +#if defined(__alpha__) +#define SAREA_MAX 0x2000U +#elif defined(__mips__) +#define SAREA_MAX 0x4000U +#elif defined(__ia64__) +#define SAREA_MAX 0x10000U /* 64kB */ +#else +/* Intel 830M driver needs at least 8k SAREA */ +#define SAREA_MAX 0x2000U +#endif + +/** Maximum number of drawables in the SAREA */ +#define SAREA_MAX_DRAWABLES 256 + +#define SAREA_DRAWABLE_CLAIMED_ENTRY 0x80000000 + +/** SAREA drawable */ +struct drm_sarea_drawable { + unsigned int stamp; + unsigned int flags; +}; + +/** SAREA frame */ +struct drm_sarea_frame { + unsigned int x; + unsigned int y; + unsigned int width; + unsigned int height; + unsigned int fullscreen; +}; + +/** SAREA */ +struct drm_sarea { + /** first thing is always the DRM locking structure */ + struct drm_hw_lock lock; + /** \todo Use readers/writer lock for drm_sarea::drawable_lock */ + struct drm_hw_lock drawable_lock; + struct drm_sarea_drawable drawableTable[SAREA_MAX_DRAWABLES]; /**< + drawables + */ + struct drm_sarea_frame frame; /**< frame */ + drm_context_t dummy_context; +}; + +typedef struct drm_sarea_drawable drm_sarea_drawable_t; +typedef struct drm_sarea_frame drm_sarea_frame_t; +typedef struct drm_sarea drm_sarea_t; + +#if defined(__cplusplus) +} +#endif + +#endif /* _DRM_SAREA_H_ */ diff --git a/include/arch/x86_64/drm/etnaviv_drm.h b/include/arch/x86_64/drm/etnaviv_drm.h new file mode 100644 index 00000000..7ed02dcc --- /dev/null +++ b/include/arch/x86_64/drm/etnaviv_drm.h @@ -0,0 +1,321 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2015 Etnaviv Project + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ETNAVIV_DRM_H__ +#define __ETNAVIV_DRM_H__ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints: + * 1) Do not use pointers, use __u64 instead for 32 bit / 64 bit + * user/kernel compatibility + * 2) Keep fields aligned to their size + * 3) Because of how drm_ioctl() works, we can add new fields at + * the end of an ioctl if some care is taken: drm_ioctl() will + * zero out the new fields at the tail of the ioctl, so a zero + * value should have a backwards compatible meaning. And for + * output params, userspace won't see the newly added output + * fields.. so that has to be somehow ok. + */ + +/* timeouts are specified in clock-monotonic absolute times (to simplify + * restarting interrupted ioctls). The following struct is logically the + * same as 'struct timespec' but 32/64b ABI safe. + */ +struct drm_etnaviv_timespec { + __s64 tv_sec; /* seconds */ + __s64 tv_nsec; /* nanoseconds */ +}; + +#define ETNAVIV_PARAM_GPU_MODEL 0x01 +#define ETNAVIV_PARAM_GPU_REVISION 0x02 +#define ETNAVIV_PARAM_GPU_FEATURES_0 0x03 +#define ETNAVIV_PARAM_GPU_FEATURES_1 0x04 +#define ETNAVIV_PARAM_GPU_FEATURES_2 0x05 +#define ETNAVIV_PARAM_GPU_FEATURES_3 0x06 +#define ETNAVIV_PARAM_GPU_FEATURES_4 0x07 +#define ETNAVIV_PARAM_GPU_FEATURES_5 0x08 +#define ETNAVIV_PARAM_GPU_FEATURES_6 0x09 +#define ETNAVIV_PARAM_GPU_FEATURES_7 0x0a +#define ETNAVIV_PARAM_GPU_FEATURES_8 0x0b +#define ETNAVIV_PARAM_GPU_FEATURES_9 0x0c +#define ETNAVIV_PARAM_GPU_FEATURES_10 0x0d +#define ETNAVIV_PARAM_GPU_FEATURES_11 0x0e +#define ETNAVIV_PARAM_GPU_FEATURES_12 0x0f + +#define ETNAVIV_PARAM_GPU_STREAM_COUNT 0x10 +#define ETNAVIV_PARAM_GPU_REGISTER_MAX 0x11 +#define ETNAVIV_PARAM_GPU_THREAD_COUNT 0x12 +#define ETNAVIV_PARAM_GPU_VERTEX_CACHE_SIZE 0x13 +#define ETNAVIV_PARAM_GPU_SHADER_CORE_COUNT 0x14 +#define ETNAVIV_PARAM_GPU_PIXEL_PIPES 0x15 +#define ETNAVIV_PARAM_GPU_VERTEX_OUTPUT_BUFFER_SIZE 0x16 +#define ETNAVIV_PARAM_GPU_BUFFER_SIZE 0x17 +#define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT 0x18 +#define ETNAVIV_PARAM_GPU_NUM_CONSTANTS 0x19 +#define ETNAVIV_PARAM_GPU_NUM_VARYINGS 0x1a +#define ETNAVIV_PARAM_SOFTPIN_START_ADDR 0x1b +#define ETNAVIV_PARAM_GPU_PRODUCT_ID 0x1c +#define ETNAVIV_PARAM_GPU_CUSTOMER_ID 0x1d +#define ETNAVIV_PARAM_GPU_ECO_ID 0x1e + +#define ETNA_MAX_PIPES 4 + +struct drm_etnaviv_param { + __u32 pipe; /* in */ + __u32 param; /* in, ETNAVIV_PARAM_x */ + __u64 value; /* out (get_param) or in (set_param) */ +}; + +/* + * GEM buffers: + */ + +#define ETNA_BO_CACHE_MASK 0x000f0000 +/* cache modes */ +#define ETNA_BO_CACHED 0x00010000 +#define ETNA_BO_WC 0x00020000 +#define ETNA_BO_UNCACHED 0x00040000 +/* map flags */ +#define ETNA_BO_FORCE_MMU 0x00100000 + +struct drm_etnaviv_gem_new { + __u64 size; /* in */ + __u32 flags; /* in, mask of ETNA_BO_x */ + __u32 handle; /* out */ +}; + +struct drm_etnaviv_gem_info { + __u32 handle; /* in */ + __u32 pad; + __u64 offset; /* out, offset to pass to mmap() */ +}; + +#define ETNA_PREP_READ 0x01 +#define ETNA_PREP_WRITE 0x02 +#define ETNA_PREP_NOSYNC 0x04 + +struct drm_etnaviv_gem_cpu_prep { + __u32 handle; /* in */ + __u32 op; /* in, mask of ETNA_PREP_x */ + struct drm_etnaviv_timespec timeout; /* in */ +}; + +struct drm_etnaviv_gem_cpu_fini { + __u32 handle; /* in */ + __u32 flags; /* in, placeholder for now, no defined values */ +}; + +/* + * Cmdstream Submission: + */ + +/* The value written into the cmdstream is logically: + * relocbuf->gpuaddr + reloc_offset + * + * NOTE that reloc's must be sorted by order of increasing submit_offset, + * otherwise EINVAL. + */ +struct drm_etnaviv_gem_submit_reloc { + __u32 submit_offset; /* in, offset from submit_bo */ + __u32 reloc_idx; /* in, index of reloc_bo buffer */ + __u64 reloc_offset; /* in, offset from start of reloc_bo */ + __u32 flags; /* in, placeholder for now, no defined values */ +}; + +/* Each buffer referenced elsewhere in the cmdstream submit (ie. the + * cmdstream buffer(s) themselves or reloc entries) has one (and only + * one) entry in the submit->bos[] table. + * + * As a optimization, the current buffer (gpu virtual address) can be + * passed back through the 'presumed' field. If on a subsequent reloc, + * userspace passes back a 'presumed' address that is still valid, + * then patching the cmdstream for this entry is skipped. This can + * avoid kernel needing to map/access the cmdstream bo in the common + * case. + * If the submit is a softpin submit (ETNA_SUBMIT_SOFTPIN) the 'presumed' + * field is interpreted as the fixed location to map the bo into the gpu + * virtual address space. If the kernel is unable to map the buffer at + * this location the submit will fail. This means userspace is responsible + * for the whole gpu virtual address management. + */ +#define ETNA_SUBMIT_BO_READ 0x0001 +#define ETNA_SUBMIT_BO_WRITE 0x0002 +struct drm_etnaviv_gem_submit_bo { + __u32 flags; /* in, mask of ETNA_SUBMIT_BO_x */ + __u32 handle; /* in, GEM handle */ + __u64 presumed; /* in/out, presumed buffer address */ +}; + +/* performance monitor request (pmr) */ +#define ETNA_PM_PROCESS_PRE 0x0001 +#define ETNA_PM_PROCESS_POST 0x0002 +struct drm_etnaviv_gem_submit_pmr { + __u32 flags; /* in, when to process request (ETNA_PM_PROCESS_x) */ + __u8 domain; /* in, pm domain */ + __u8 pad; + __u16 signal; /* in, pm signal */ + __u32 sequence; /* in, sequence number */ + __u32 read_offset; /* in, offset from read_bo */ + __u32 read_idx; /* in, index of read_bo buffer */ +}; + +/* Each cmdstream submit consists of a table of buffers involved, and + * one or more cmdstream buffers. This allows for conditional execution + * (context-restore), and IB buffers needed for per tile/bin draw cmds. + */ +#define ETNA_SUBMIT_NO_IMPLICIT 0x0001 +#define ETNA_SUBMIT_FENCE_FD_IN 0x0002 +#define ETNA_SUBMIT_FENCE_FD_OUT 0x0004 +#define ETNA_SUBMIT_SOFTPIN 0x0008 +#define ETNA_SUBMIT_FLAGS \ + (ETNA_SUBMIT_NO_IMPLICIT | ETNA_SUBMIT_FENCE_FD_IN | \ + ETNA_SUBMIT_FENCE_FD_OUT | ETNA_SUBMIT_SOFTPIN) +#define ETNA_PIPE_3D 0x00 +#define ETNA_PIPE_2D 0x01 +#define ETNA_PIPE_VG 0x02 +struct drm_etnaviv_gem_submit { + __u32 fence; /* out */ + __u32 pipe; /* in */ + __u32 exec_state; /* in, initial execution state (ETNA_PIPE_x) */ + __u32 nr_bos; /* in, number of submit_bo's */ + __u32 nr_relocs; /* in, number of submit_reloc's */ + __u32 stream_size; /* in, cmdstream size */ + __u64 bos; /* in, ptr to array of submit_bo's */ + __u64 relocs; /* in, ptr to array of submit_reloc's */ + __u64 stream; /* in, ptr to cmdstream */ + __u32 flags; /* in, mask of ETNA_SUBMIT_x */ + __s32 fence_fd; /* in/out, fence fd (see ETNA_SUBMIT_FENCE_FD_x) */ + __u64 pmrs; /* in, ptr to array of submit_pmr's */ + __u32 nr_pmrs; /* in, number of submit_pmr's */ + __u32 pad; +}; + +/* The normal way to synchronize with the GPU is just to CPU_PREP on + * a buffer if you need to access it from the CPU (other cmdstream + * submission from same or other contexts, PAGE_FLIP ioctl, etc, all + * handle the required synchronization under the hood). This ioctl + * mainly just exists as a way to implement the gallium pipe_fence + * APIs without requiring a dummy bo to synchronize on. + */ +#define ETNA_WAIT_NONBLOCK 0x01 +struct drm_etnaviv_wait_fence { + __u32 pipe; /* in */ + __u32 fence; /* in */ + __u32 flags; /* in, mask of ETNA_WAIT_x */ + __u32 pad; + struct drm_etnaviv_timespec timeout; /* in */ +}; + +#define ETNA_USERPTR_READ 0x01 +#define ETNA_USERPTR_WRITE 0x02 +struct drm_etnaviv_gem_userptr { + __u64 user_ptr; /* in, page aligned user pointer */ + __u64 user_size; /* in, page aligned user size */ + __u32 flags; /* in, flags */ + __u32 handle; /* out, non-zero handle */ +}; + +struct drm_etnaviv_gem_wait { + __u32 pipe; /* in */ + __u32 handle; /* in, bo to be waited for */ + __u32 flags; /* in, mask of ETNA_WAIT_x */ + __u32 pad; + struct drm_etnaviv_timespec timeout; /* in */ +}; + +/* + * Performance Monitor (PM): + */ + +struct drm_etnaviv_pm_domain { + __u32 pipe; /* in */ + __u8 iter; /* in/out, select pm domain at index iter */ + __u8 id; /* out, id of domain */ + __u16 nr_signals; /* out, how many signals does this domain provide */ + char name[64]; /* out, name of domain */ +}; + +struct drm_etnaviv_pm_signal { + __u32 pipe; /* in */ + __u8 domain; /* in, pm domain index */ + __u8 pad; + __u16 iter; /* in/out, select pm source at index iter */ + __u16 id; /* out, id of signal */ + char name[64]; /* out, name of domain */ +}; + +#define DRM_ETNAVIV_GET_PARAM 0x00 +/* placeholder: +#define DRM_ETNAVIV_SET_PARAM 0x01 + */ +#define DRM_ETNAVIV_GEM_NEW 0x02 +#define DRM_ETNAVIV_GEM_INFO 0x03 +#define DRM_ETNAVIV_GEM_CPU_PREP 0x04 +#define DRM_ETNAVIV_GEM_CPU_FINI 0x05 +#define DRM_ETNAVIV_GEM_SUBMIT 0x06 +#define DRM_ETNAVIV_WAIT_FENCE 0x07 +#define DRM_ETNAVIV_GEM_USERPTR 0x08 +#define DRM_ETNAVIV_GEM_WAIT 0x09 +#define DRM_ETNAVIV_PM_QUERY_DOM 0x0a +#define DRM_ETNAVIV_PM_QUERY_SIG 0x0b +#define DRM_ETNAVIV_NUM_IOCTLS 0x0c + +#define DRM_IOCTL_ETNAVIV_GET_PARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GET_PARAM, \ + struct drm_etnaviv_param) +#define DRM_IOCTL_ETNAVIV_GEM_NEW \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_NEW, \ + struct drm_etnaviv_gem_new) +#define DRM_IOCTL_ETNAVIV_GEM_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_INFO, \ + struct drm_etnaviv_gem_info) +#define DRM_IOCTL_ETNAVIV_GEM_CPU_PREP \ + DRM_IOW(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_CPU_PREP, \ + struct drm_etnaviv_gem_cpu_prep) +#define DRM_IOCTL_ETNAVIV_GEM_CPU_FINI \ + DRM_IOW(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_CPU_FINI, \ + struct drm_etnaviv_gem_cpu_fini) +#define DRM_IOCTL_ETNAVIV_GEM_SUBMIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_SUBMIT, \ + struct drm_etnaviv_gem_submit) +#define DRM_IOCTL_ETNAVIV_WAIT_FENCE \ + DRM_IOW(DRM_COMMAND_BASE + DRM_ETNAVIV_WAIT_FENCE, \ + struct drm_etnaviv_wait_fence) +#define DRM_IOCTL_ETNAVIV_GEM_USERPTR \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_USERPTR, \ + struct drm_etnaviv_gem_userptr) +#define DRM_IOCTL_ETNAVIV_GEM_WAIT \ + DRM_IOW(DRM_COMMAND_BASE + DRM_ETNAVIV_GEM_WAIT, \ + struct drm_etnaviv_gem_wait) +#define DRM_IOCTL_ETNAVIV_PM_QUERY_DOM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_PM_QUERY_DOM, \ + struct drm_etnaviv_pm_domain) +#define DRM_IOCTL_ETNAVIV_PM_QUERY_SIG \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_PM_QUERY_SIG, \ + struct drm_etnaviv_pm_signal) + +#if defined(__cplusplus) +} +#endif + +#endif /* __ETNAVIV_DRM_H__ */ diff --git a/include/arch/x86_64/drm/exynos_drm.h b/include/arch/x86_64/drm/exynos_drm.h new file mode 100644 index 00000000..414cabdc --- /dev/null +++ b/include/arch/x86_64/drm/exynos_drm.h @@ -0,0 +1,433 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* exynos_drm.h + * + * Copyright (c) 2011 Samsung Electronics Co., Ltd. + * Authors: + * Inki Dae <inki.dae@samsung.com> + * Joonyoung Shim <jy0922.shim@samsung.com> + * Seung-Woo Kim <sw0312.kim@samsung.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef _EXYNOS_DRM_H_ +#define _EXYNOS_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/** + * User-desired buffer creation information structure. + * + * @size: user-desired memory allocation size. + * - this size value would be page-aligned internally. + * @flags: user request for setting memory type or cache attributes. + * @handle: returned a handle to created gem object. + * - this handle will be set by gem module of kernel side. + */ +struct drm_exynos_gem_create { + __u64 size; + __u32 flags; + __u32 handle; +}; + +/** + * A structure for getting a fake-offset that can be used with mmap. + * + * @handle: handle of gem object. + * @reserved: just padding to be 64-bit aligned. + * @offset: a fake-offset of gem object. + */ +struct drm_exynos_gem_map { + __u32 handle; + __u32 reserved; + __u64 offset; +}; + +/** + * A structure to gem information. + * + * @handle: a handle to gem object created. + * @flags: flag value including memory type and cache attribute and + * this value would be set by driver. + * @size: size to memory region allocated by gem and this size would + * be set by driver. + */ +struct drm_exynos_gem_info { + __u32 handle; + __u32 flags; + __u64 size; +}; + +/** + * A structure for user connection request of virtual display. + * + * @connection: indicate whether doing connection or not by user. + * @extensions: if this value is 1 then the vidi driver would need additional + * 128bytes edid data. + * @edid: the edid data pointer from user side. + */ +struct drm_exynos_vidi_connection { + __u32 connection; + __u32 extensions; + __u64 edid; +}; + +/* memory type definitions. */ +enum e_drm_exynos_gem_mem_type { + /* Physically Continuous memory and used as default. */ + EXYNOS_BO_CONTIG = 0 << 0, + /* Physically Non-Continuous memory. */ + EXYNOS_BO_NONCONTIG = 1 << 0, + /* non-cachable mapping and used as default. */ + EXYNOS_BO_NONCACHABLE = 0 << 1, + /* cachable mapping. */ + EXYNOS_BO_CACHABLE = 1 << 1, + /* write-combine mapping. */ + EXYNOS_BO_WC = 1 << 2, + EXYNOS_BO_MASK = EXYNOS_BO_NONCONTIG | EXYNOS_BO_CACHABLE | EXYNOS_BO_WC +}; + +struct drm_exynos_g2d_get_ver { + __u32 major; + __u32 minor; +}; + +struct drm_exynos_g2d_cmd { + __u32 offset; + __u32 data; +}; + +enum drm_exynos_g2d_buf_type { + G2D_BUF_USERPTR = 1 << 31, +}; + +enum drm_exynos_g2d_event_type { + G2D_EVENT_NOT, + G2D_EVENT_NONSTOP, + G2D_EVENT_STOP, /* not yet */ +}; + +struct drm_exynos_g2d_userptr { + unsigned long userptr; + unsigned long size; +}; + +struct drm_exynos_g2d_set_cmdlist { + __u64 cmd; + __u64 cmd_buf; + __u32 cmd_nr; + __u32 cmd_buf_nr; + + /* for g2d event */ + __u64 event_type; + __u64 user_data; +}; + +struct drm_exynos_g2d_exec { + __u64 async; +}; + +/* Exynos DRM IPP v2 API */ + +/** + * Enumerate available IPP hardware modules. + * + * @count_ipps: size of ipp_id array / number of ipp modules (set by driver) + * @reserved: padding + * @ipp_id_ptr: pointer to ipp_id array or NULL + */ +struct drm_exynos_ioctl_ipp_get_res { + __u32 count_ipps; + __u32 reserved; + __u64 ipp_id_ptr; +}; + +enum drm_exynos_ipp_format_type { + DRM_EXYNOS_IPP_FORMAT_SOURCE = 0x01, + DRM_EXYNOS_IPP_FORMAT_DESTINATION = 0x02, +}; + +struct drm_exynos_ipp_format { + __u32 fourcc; + __u32 type; + __u64 modifier; +}; + +enum drm_exynos_ipp_capability { + DRM_EXYNOS_IPP_CAP_CROP = 0x01, + DRM_EXYNOS_IPP_CAP_ROTATE = 0x02, + DRM_EXYNOS_IPP_CAP_SCALE = 0x04, + DRM_EXYNOS_IPP_CAP_CONVERT = 0x08, +}; + +/** + * Get IPP hardware capabilities and supported image formats. + * + * @ipp_id: id of IPP module to query + * @capabilities: bitmask of drm_exynos_ipp_capability (set by driver) + * @reserved: padding + * @formats_count: size of formats array (in entries) / number of filled + * formats (set by driver) + * @formats_ptr: pointer to formats array or NULL + */ +struct drm_exynos_ioctl_ipp_get_caps { + __u32 ipp_id; + __u32 capabilities; + __u32 reserved; + __u32 formats_count; + __u64 formats_ptr; +}; + +enum drm_exynos_ipp_limit_type { + /* size (horizontal/vertial) limits, in pixels (min, max, alignment) */ + DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE = 0x0001, + /* scale ratio (horizonta/vertial), 16.16 fixed point (min, max) */ + DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE = 0x0002, + + /* image buffer area */ + DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER = 0x0001 << 16, + /* src/dst rectangle area */ + DRM_EXYNOS_IPP_LIMIT_SIZE_AREA = 0x0002 << 16, + /* src/dst rectangle area when rotation enabled */ + DRM_EXYNOS_IPP_LIMIT_SIZE_ROTATED = 0x0003 << 16, + + DRM_EXYNOS_IPP_LIMIT_TYPE_MASK = 0x000f, + DRM_EXYNOS_IPP_LIMIT_SIZE_MASK = 0x000f << 16, +}; + +struct drm_exynos_ipp_limit_val { + __u32 min; + __u32 max; + __u32 align; + __u32 reserved; +}; + +/** + * IPP module limitation. + * + * @type: limit type (see drm_exynos_ipp_limit_type enum) + * @reserved: padding + * @h: horizontal limits + * @v: vertical limits + */ +struct drm_exynos_ipp_limit { + __u32 type; + __u32 reserved; + struct drm_exynos_ipp_limit_val h; + struct drm_exynos_ipp_limit_val v; +}; + +/** + * Get IPP limits for given image format. + * + * @ipp_id: id of IPP module to query + * @fourcc: image format code (see DRM_FORMAT_* in drm_fourcc.h) + * @modifier: image format modifier (see DRM_FORMAT_MOD_* in drm_fourcc.h) + * @type: source/destination identifier (drm_exynos_ipp_format_flag enum) + * @limits_count: size of limits array (in entries) / number of filled entries + * (set by driver) + * @limits_ptr: pointer to limits array or NULL + */ +struct drm_exynos_ioctl_ipp_get_limits { + __u32 ipp_id; + __u32 fourcc; + __u64 modifier; + __u32 type; + __u32 limits_count; + __u64 limits_ptr; +}; + +enum drm_exynos_ipp_task_id { + /* buffer described by struct drm_exynos_ipp_task_buffer */ + DRM_EXYNOS_IPP_TASK_BUFFER = 0x0001, + /* rectangle described by struct drm_exynos_ipp_task_rect */ + DRM_EXYNOS_IPP_TASK_RECTANGLE = 0x0002, + /* transformation described by struct drm_exynos_ipp_task_transform */ + DRM_EXYNOS_IPP_TASK_TRANSFORM = 0x0003, + /* alpha configuration described by struct drm_exynos_ipp_task_alpha */ + DRM_EXYNOS_IPP_TASK_ALPHA = 0x0004, + + /* source image data (for buffer and rectangle chunks) */ + DRM_EXYNOS_IPP_TASK_TYPE_SOURCE = 0x0001 << 16, + /* destination image data (for buffer and rectangle chunks) */ + DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION = 0x0002 << 16, +}; + +/** + * Memory buffer with image data. + * + * @id: must be DRM_EXYNOS_IPP_TASK_BUFFER + * other parameters are same as for AddFB2 generic DRM ioctl + */ +struct drm_exynos_ipp_task_buffer { + __u32 id; + __u32 fourcc; + __u32 width, height; + __u32 gem_id[4]; + __u32 offset[4]; + __u32 pitch[4]; + __u64 modifier; +}; + +/** + * Rectangle for processing. + * + * @id: must be DRM_EXYNOS_IPP_TASK_RECTANGLE + * @reserved: padding + * @x,@y: left corner in pixels + * @w,@h: width/height in pixels + */ +struct drm_exynos_ipp_task_rect { + __u32 id; + __u32 reserved; + __u32 x; + __u32 y; + __u32 w; + __u32 h; +}; + +/** + * Image tranformation description. + * + * @id: must be DRM_EXYNOS_IPP_TASK_TRANSFORM + * @rotation: DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* values + */ +struct drm_exynos_ipp_task_transform { + __u32 id; + __u32 rotation; +}; + +/** + * Image global alpha configuration for formats without alpha values. + * + * @id: must be DRM_EXYNOS_IPP_TASK_ALPHA + * @value: global alpha value (0-255) + */ +struct drm_exynos_ipp_task_alpha { + __u32 id; + __u32 value; +}; + +enum drm_exynos_ipp_flag { + /* generate DRM event after processing */ + DRM_EXYNOS_IPP_FLAG_EVENT = 0x01, + /* dry run, only check task parameters */ + DRM_EXYNOS_IPP_FLAG_TEST_ONLY = 0x02, + /* non-blocking processing */ + DRM_EXYNOS_IPP_FLAG_NONBLOCK = 0x04, +}; + +#define DRM_EXYNOS_IPP_FLAGS \ + (DRM_EXYNOS_IPP_FLAG_EVENT | DRM_EXYNOS_IPP_FLAG_TEST_ONLY | \ + DRM_EXYNOS_IPP_FLAG_NONBLOCK) + +/** + * Perform image processing described by array of drm_exynos_ipp_task_* + * structures (parameters array). + * + * @ipp_id: id of IPP module to run the task + * @flags: bitmask of drm_exynos_ipp_flag values + * @reserved: padding + * @params_size: size of parameters array (in bytes) + * @params_ptr: pointer to parameters array or NULL + * @user_data: (optional) data for drm event + */ +struct drm_exynos_ioctl_ipp_commit { + __u32 ipp_id; + __u32 flags; + __u32 reserved; + __u32 params_size; + __u64 params_ptr; + __u64 user_data; +}; + +#define DRM_EXYNOS_GEM_CREATE 0x00 +#define DRM_EXYNOS_GEM_MAP 0x01 +/* Reserved 0x03 ~ 0x05 for exynos specific gem ioctl */ +#define DRM_EXYNOS_GEM_GET 0x04 +#define DRM_EXYNOS_VIDI_CONNECTION 0x07 + +/* G2D */ +#define DRM_EXYNOS_G2D_GET_VER 0x20 +#define DRM_EXYNOS_G2D_SET_CMDLIST 0x21 +#define DRM_EXYNOS_G2D_EXEC 0x22 + +/* Reserved 0x30 ~ 0x33 for obsolete Exynos IPP ioctls */ +/* IPP - Image Post Processing */ +#define DRM_EXYNOS_IPP_GET_RESOURCES 0x40 +#define DRM_EXYNOS_IPP_GET_CAPS 0x41 +#define DRM_EXYNOS_IPP_GET_LIMITS 0x42 +#define DRM_EXYNOS_IPP_COMMIT 0x43 + +#define DRM_IOCTL_EXYNOS_GEM_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_EXYNOS_GEM_CREATE, \ + struct drm_exynos_gem_create) +#define DRM_IOCTL_EXYNOS_GEM_MAP \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_EXYNOS_GEM_MAP, \ + struct drm_exynos_gem_map) +#define DRM_IOCTL_EXYNOS_GEM_GET \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_EXYNOS_GEM_GET, \ + struct drm_exynos_gem_info) + +#define DRM_IOCTL_EXYNOS_VIDI_CONNECTION \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_EXYNOS_VIDI_CONNECTION, \ + struct drm_exynos_vidi_connection) + +#define DRM_IOCTL_EXYNOS_G2D_GET_VER \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_EXYNOS_G2D_GET_VER, \ + struct drm_exynos_g2d_get_ver) +#define DRM_IOCTL_EXYNOS_G2D_SET_CMDLIST \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_EXYNOS_G2D_SET_CMDLIST, \ + struct drm_exynos_g2d_set_cmdlist) +#define DRM_IOCTL_EXYNOS_G2D_EXEC \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_EXYNOS_G2D_EXEC, \ + struct drm_exynos_g2d_exec) + +#define DRM_IOCTL_EXYNOS_IPP_GET_RESOURCES \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_EXYNOS_IPP_GET_RESOURCES, \ + struct drm_exynos_ioctl_ipp_get_res) +#define DRM_IOCTL_EXYNOS_IPP_GET_CAPS \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_EXYNOS_IPP_GET_CAPS, \ + struct drm_exynos_ioctl_ipp_get_caps) +#define DRM_IOCTL_EXYNOS_IPP_GET_LIMITS \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_EXYNOS_IPP_GET_LIMITS, \ + struct drm_exynos_ioctl_ipp_get_limits) +#define DRM_IOCTL_EXYNOS_IPP_COMMIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_EXYNOS_IPP_COMMIT, \ + struct drm_exynos_ioctl_ipp_commit) + +/* Exynos specific events */ +#define DRM_EXYNOS_G2D_EVENT 0x80000000 +#define DRM_EXYNOS_IPP_EVENT 0x80000002 + +struct drm_exynos_g2d_event { + struct drm_event base; + __u64 user_data; + __u32 tv_sec; + __u32 tv_usec; + __u32 cmdlist_no; + __u32 reserved; +}; + +struct drm_exynos_ipp_event { + struct drm_event base; + __u64 user_data; + __u32 tv_sec; + __u32 tv_usec; + __u32 ipp_id; + __u32 sequence; + __u64 reserved; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _EXYNOS_DRM_H_ */ diff --git a/include/arch/x86_64/drm/habanalabs_accel.h b/include/arch/x86_64/drm/habanalabs_accel.h new file mode 100644 index 00000000..60b1f21d --- /dev/null +++ b/include/arch/x86_64/drm/habanalabs_accel.h @@ -0,0 +1,2399 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + * + * Copyright 2016-2023 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef HABANALABS_H_ +#define HABANALABS_H_ + +#include <drm/drm.h> + +/* + * Defines that are asic-specific but constitutes as ABI between kernel driver + * and userspace + */ +#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */ +#define GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START 0x80 /* 128 bytes */ + +/* + * 128 SOBs reserved for collective wait + * 16 SOBs reserved for sync stream + */ +#define GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT 144 + +/* + * 64 monitors reserved for collective wait + * 8 monitors reserved for sync stream + */ +#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR 72 + +/* Max number of elements in timestamps registration buffers */ +#define TS_MAX_ELEMENTS_NUM (1 << 20) /* 1MB */ + +/* + * Goya queue Numbering + * + * The external queues (PCI DMA channels) MUST be before the internal queues + * and each group (PCI DMA channels and internal) must be contiguous inside + * itself but there can be a gap between the two groups (although not + * recommended) + */ + +enum goya_queue_id { + GOYA_QUEUE_ID_DMA_0 = 0, + GOYA_QUEUE_ID_DMA_1 = 1, + GOYA_QUEUE_ID_DMA_2 = 2, + GOYA_QUEUE_ID_DMA_3 = 3, + GOYA_QUEUE_ID_DMA_4 = 4, + GOYA_QUEUE_ID_CPU_PQ = 5, + GOYA_QUEUE_ID_MME = 6, /* Internal queues start here */ + GOYA_QUEUE_ID_TPC0 = 7, + GOYA_QUEUE_ID_TPC1 = 8, + GOYA_QUEUE_ID_TPC2 = 9, + GOYA_QUEUE_ID_TPC3 = 10, + GOYA_QUEUE_ID_TPC4 = 11, + GOYA_QUEUE_ID_TPC5 = 12, + GOYA_QUEUE_ID_TPC6 = 13, + GOYA_QUEUE_ID_TPC7 = 14, + GOYA_QUEUE_ID_SIZE +}; + +/* + * Gaudi queue Numbering + * External queues (PCI DMA channels) are DMA_0_*, DMA_1_* and DMA_5_*. + * Except one CPU queue, all the rest are internal queues. + */ + +enum gaudi_queue_id { + GAUDI_QUEUE_ID_DMA_0_0 = 0, /* external */ + GAUDI_QUEUE_ID_DMA_0_1 = 1, /* external */ + GAUDI_QUEUE_ID_DMA_0_2 = 2, /* external */ + GAUDI_QUEUE_ID_DMA_0_3 = 3, /* external */ + GAUDI_QUEUE_ID_DMA_1_0 = 4, /* external */ + GAUDI_QUEUE_ID_DMA_1_1 = 5, /* external */ + GAUDI_QUEUE_ID_DMA_1_2 = 6, /* external */ + GAUDI_QUEUE_ID_DMA_1_3 = 7, /* external */ + GAUDI_QUEUE_ID_CPU_PQ = 8, /* CPU */ + GAUDI_QUEUE_ID_DMA_2_0 = 9, /* internal */ + GAUDI_QUEUE_ID_DMA_2_1 = 10, /* internal */ + GAUDI_QUEUE_ID_DMA_2_2 = 11, /* internal */ + GAUDI_QUEUE_ID_DMA_2_3 = 12, /* internal */ + GAUDI_QUEUE_ID_DMA_3_0 = 13, /* internal */ + GAUDI_QUEUE_ID_DMA_3_1 = 14, /* internal */ + GAUDI_QUEUE_ID_DMA_3_2 = 15, /* internal */ + GAUDI_QUEUE_ID_DMA_3_3 = 16, /* internal */ + GAUDI_QUEUE_ID_DMA_4_0 = 17, /* internal */ + GAUDI_QUEUE_ID_DMA_4_1 = 18, /* internal */ + GAUDI_QUEUE_ID_DMA_4_2 = 19, /* internal */ + GAUDI_QUEUE_ID_DMA_4_3 = 20, /* internal */ + GAUDI_QUEUE_ID_DMA_5_0 = 21, /* internal */ + GAUDI_QUEUE_ID_DMA_5_1 = 22, /* internal */ + GAUDI_QUEUE_ID_DMA_5_2 = 23, /* internal */ + GAUDI_QUEUE_ID_DMA_5_3 = 24, /* internal */ + GAUDI_QUEUE_ID_DMA_6_0 = 25, /* internal */ + GAUDI_QUEUE_ID_DMA_6_1 = 26, /* internal */ + GAUDI_QUEUE_ID_DMA_6_2 = 27, /* internal */ + GAUDI_QUEUE_ID_DMA_6_3 = 28, /* internal */ + GAUDI_QUEUE_ID_DMA_7_0 = 29, /* internal */ + GAUDI_QUEUE_ID_DMA_7_1 = 30, /* internal */ + GAUDI_QUEUE_ID_DMA_7_2 = 31, /* internal */ + GAUDI_QUEUE_ID_DMA_7_3 = 32, /* internal */ + GAUDI_QUEUE_ID_MME_0_0 = 33, /* internal */ + GAUDI_QUEUE_ID_MME_0_1 = 34, /* internal */ + GAUDI_QUEUE_ID_MME_0_2 = 35, /* internal */ + GAUDI_QUEUE_ID_MME_0_3 = 36, /* internal */ + GAUDI_QUEUE_ID_MME_1_0 = 37, /* internal */ + GAUDI_QUEUE_ID_MME_1_1 = 38, /* internal */ + GAUDI_QUEUE_ID_MME_1_2 = 39, /* internal */ + GAUDI_QUEUE_ID_MME_1_3 = 40, /* internal */ + GAUDI_QUEUE_ID_TPC_0_0 = 41, /* internal */ + GAUDI_QUEUE_ID_TPC_0_1 = 42, /* internal */ + GAUDI_QUEUE_ID_TPC_0_2 = 43, /* internal */ + GAUDI_QUEUE_ID_TPC_0_3 = 44, /* internal */ + GAUDI_QUEUE_ID_TPC_1_0 = 45, /* internal */ + GAUDI_QUEUE_ID_TPC_1_1 = 46, /* internal */ + GAUDI_QUEUE_ID_TPC_1_2 = 47, /* internal */ + GAUDI_QUEUE_ID_TPC_1_3 = 48, /* internal */ + GAUDI_QUEUE_ID_TPC_2_0 = 49, /* internal */ + GAUDI_QUEUE_ID_TPC_2_1 = 50, /* internal */ + GAUDI_QUEUE_ID_TPC_2_2 = 51, /* internal */ + GAUDI_QUEUE_ID_TPC_2_3 = 52, /* internal */ + GAUDI_QUEUE_ID_TPC_3_0 = 53, /* internal */ + GAUDI_QUEUE_ID_TPC_3_1 = 54, /* internal */ + GAUDI_QUEUE_ID_TPC_3_2 = 55, /* internal */ + GAUDI_QUEUE_ID_TPC_3_3 = 56, /* internal */ + GAUDI_QUEUE_ID_TPC_4_0 = 57, /* internal */ + GAUDI_QUEUE_ID_TPC_4_1 = 58, /* internal */ + GAUDI_QUEUE_ID_TPC_4_2 = 59, /* internal */ + GAUDI_QUEUE_ID_TPC_4_3 = 60, /* internal */ + GAUDI_QUEUE_ID_TPC_5_0 = 61, /* internal */ + GAUDI_QUEUE_ID_TPC_5_1 = 62, /* internal */ + GAUDI_QUEUE_ID_TPC_5_2 = 63, /* internal */ + GAUDI_QUEUE_ID_TPC_5_3 = 64, /* internal */ + GAUDI_QUEUE_ID_TPC_6_0 = 65, /* internal */ + GAUDI_QUEUE_ID_TPC_6_1 = 66, /* internal */ + GAUDI_QUEUE_ID_TPC_6_2 = 67, /* internal */ + GAUDI_QUEUE_ID_TPC_6_3 = 68, /* internal */ + GAUDI_QUEUE_ID_TPC_7_0 = 69, /* internal */ + GAUDI_QUEUE_ID_TPC_7_1 = 70, /* internal */ + GAUDI_QUEUE_ID_TPC_7_2 = 71, /* internal */ + GAUDI_QUEUE_ID_TPC_7_3 = 72, /* internal */ + GAUDI_QUEUE_ID_NIC_0_0 = 73, /* internal */ + GAUDI_QUEUE_ID_NIC_0_1 = 74, /* internal */ + GAUDI_QUEUE_ID_NIC_0_2 = 75, /* internal */ + GAUDI_QUEUE_ID_NIC_0_3 = 76, /* internal */ + GAUDI_QUEUE_ID_NIC_1_0 = 77, /* internal */ + GAUDI_QUEUE_ID_NIC_1_1 = 78, /* internal */ + GAUDI_QUEUE_ID_NIC_1_2 = 79, /* internal */ + GAUDI_QUEUE_ID_NIC_1_3 = 80, /* internal */ + GAUDI_QUEUE_ID_NIC_2_0 = 81, /* internal */ + GAUDI_QUEUE_ID_NIC_2_1 = 82, /* internal */ + GAUDI_QUEUE_ID_NIC_2_2 = 83, /* internal */ + GAUDI_QUEUE_ID_NIC_2_3 = 84, /* internal */ + GAUDI_QUEUE_ID_NIC_3_0 = 85, /* internal */ + GAUDI_QUEUE_ID_NIC_3_1 = 86, /* internal */ + GAUDI_QUEUE_ID_NIC_3_2 = 87, /* internal */ + GAUDI_QUEUE_ID_NIC_3_3 = 88, /* internal */ + GAUDI_QUEUE_ID_NIC_4_0 = 89, /* internal */ + GAUDI_QUEUE_ID_NIC_4_1 = 90, /* internal */ + GAUDI_QUEUE_ID_NIC_4_2 = 91, /* internal */ + GAUDI_QUEUE_ID_NIC_4_3 = 92, /* internal */ + GAUDI_QUEUE_ID_NIC_5_0 = 93, /* internal */ + GAUDI_QUEUE_ID_NIC_5_1 = 94, /* internal */ + GAUDI_QUEUE_ID_NIC_5_2 = 95, /* internal */ + GAUDI_QUEUE_ID_NIC_5_3 = 96, /* internal */ + GAUDI_QUEUE_ID_NIC_6_0 = 97, /* internal */ + GAUDI_QUEUE_ID_NIC_6_1 = 98, /* internal */ + GAUDI_QUEUE_ID_NIC_6_2 = 99, /* internal */ + GAUDI_QUEUE_ID_NIC_6_3 = 100, /* internal */ + GAUDI_QUEUE_ID_NIC_7_0 = 101, /* internal */ + GAUDI_QUEUE_ID_NIC_7_1 = 102, /* internal */ + GAUDI_QUEUE_ID_NIC_7_2 = 103, /* internal */ + GAUDI_QUEUE_ID_NIC_7_3 = 104, /* internal */ + GAUDI_QUEUE_ID_NIC_8_0 = 105, /* internal */ + GAUDI_QUEUE_ID_NIC_8_1 = 106, /* internal */ + GAUDI_QUEUE_ID_NIC_8_2 = 107, /* internal */ + GAUDI_QUEUE_ID_NIC_8_3 = 108, /* internal */ + GAUDI_QUEUE_ID_NIC_9_0 = 109, /* internal */ + GAUDI_QUEUE_ID_NIC_9_1 = 110, /* internal */ + GAUDI_QUEUE_ID_NIC_9_2 = 111, /* internal */ + GAUDI_QUEUE_ID_NIC_9_3 = 112, /* internal */ + GAUDI_QUEUE_ID_SIZE +}; + +/* + * In GAUDI2 we have two modes of operation in regard to queues: + * 1. Legacy mode, where each QMAN exposes 4 streams to the user + * 2. F/W mode, where we use F/W to schedule the JOBS to the different queues. + * + * When in legacy mode, the user sends the queue id per JOB according to + * enum gaudi2_queue_id below. + * + * When in F/W mode, the user sends a stream id per Command Submission. The + * stream id is a running number from 0 up to (N-1), where N is the number + * of streams the F/W exposes and is passed to the user in + * struct hl_info_hw_ip_info + */ + +enum gaudi2_queue_id { + GAUDI2_QUEUE_ID_PDMA_0_0 = 0, + GAUDI2_QUEUE_ID_PDMA_0_1 = 1, + GAUDI2_QUEUE_ID_PDMA_0_2 = 2, + GAUDI2_QUEUE_ID_PDMA_0_3 = 3, + GAUDI2_QUEUE_ID_PDMA_1_0 = 4, + GAUDI2_QUEUE_ID_PDMA_1_1 = 5, + GAUDI2_QUEUE_ID_PDMA_1_2 = 6, + GAUDI2_QUEUE_ID_PDMA_1_3 = 7, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0 = 8, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1 = 9, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2 = 10, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3 = 11, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0 = 12, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1 = 13, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2 = 14, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3 = 15, + GAUDI2_QUEUE_ID_DCORE0_MME_0_0 = 16, + GAUDI2_QUEUE_ID_DCORE0_MME_0_1 = 17, + GAUDI2_QUEUE_ID_DCORE0_MME_0_2 = 18, + GAUDI2_QUEUE_ID_DCORE0_MME_0_3 = 19, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 = 20, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_1 = 21, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_2 = 22, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_3 = 23, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_0 = 24, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_1 = 25, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_2 = 26, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_3 = 27, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_0 = 28, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_1 = 29, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_2 = 30, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_3 = 31, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_0 = 32, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_1 = 33, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_2 = 34, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_3 = 35, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_0 = 36, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_1 = 37, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_2 = 38, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_3 = 39, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_0 = 40, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_1 = 41, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_2 = 42, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_3 = 43, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 = 44, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_1 = 45, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_2 = 46, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_3 = 47, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0 = 48, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1 = 49, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2 = 50, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3 = 51, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0 = 52, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1 = 53, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2 = 54, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3 = 55, + GAUDI2_QUEUE_ID_DCORE1_MME_0_0 = 56, + GAUDI2_QUEUE_ID_DCORE1_MME_0_1 = 57, + GAUDI2_QUEUE_ID_DCORE1_MME_0_2 = 58, + GAUDI2_QUEUE_ID_DCORE1_MME_0_3 = 59, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 = 60, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_1 = 61, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_2 = 62, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_3 = 63, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_0 = 64, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_1 = 65, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_2 = 66, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_3 = 67, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_0 = 68, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_1 = 69, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_2 = 70, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_3 = 71, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_0 = 72, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_1 = 73, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_2 = 74, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_3 = 75, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_0 = 76, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_1 = 77, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_2 = 78, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_3 = 79, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_0 = 80, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_1 = 81, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_2 = 82, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_3 = 83, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0 = 84, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1 = 85, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2 = 86, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3 = 87, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0 = 88, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1 = 89, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2 = 90, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3 = 91, + GAUDI2_QUEUE_ID_DCORE2_MME_0_0 = 92, + GAUDI2_QUEUE_ID_DCORE2_MME_0_1 = 93, + GAUDI2_QUEUE_ID_DCORE2_MME_0_2 = 94, + GAUDI2_QUEUE_ID_DCORE2_MME_0_3 = 95, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 = 96, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_1 = 97, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_2 = 98, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_3 = 99, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_0 = 100, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_1 = 101, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_2 = 102, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_3 = 103, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_0 = 104, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_1 = 105, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_2 = 106, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_3 = 107, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_0 = 108, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_1 = 109, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_2 = 110, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_3 = 111, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_0 = 112, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_1 = 113, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_2 = 114, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_3 = 115, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_0 = 116, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_1 = 117, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_2 = 118, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_3 = 119, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0 = 120, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1 = 121, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2 = 122, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3 = 123, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0 = 124, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1 = 125, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2 = 126, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3 = 127, + GAUDI2_QUEUE_ID_DCORE3_MME_0_0 = 128, + GAUDI2_QUEUE_ID_DCORE3_MME_0_1 = 129, + GAUDI2_QUEUE_ID_DCORE3_MME_0_2 = 130, + GAUDI2_QUEUE_ID_DCORE3_MME_0_3 = 131, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 = 132, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_1 = 133, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_2 = 134, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_3 = 135, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_0 = 136, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_1 = 137, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_2 = 138, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_3 = 139, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_0 = 140, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_1 = 141, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_2 = 142, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_3 = 143, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_0 = 144, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_1 = 145, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_2 = 146, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_3 = 147, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_0 = 148, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_1 = 149, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_2 = 150, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_3 = 151, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_0 = 152, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_1 = 153, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_2 = 154, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_3 = 155, + GAUDI2_QUEUE_ID_NIC_0_0 = 156, + GAUDI2_QUEUE_ID_NIC_0_1 = 157, + GAUDI2_QUEUE_ID_NIC_0_2 = 158, + GAUDI2_QUEUE_ID_NIC_0_3 = 159, + GAUDI2_QUEUE_ID_NIC_1_0 = 160, + GAUDI2_QUEUE_ID_NIC_1_1 = 161, + GAUDI2_QUEUE_ID_NIC_1_2 = 162, + GAUDI2_QUEUE_ID_NIC_1_3 = 163, + GAUDI2_QUEUE_ID_NIC_2_0 = 164, + GAUDI2_QUEUE_ID_NIC_2_1 = 165, + GAUDI2_QUEUE_ID_NIC_2_2 = 166, + GAUDI2_QUEUE_ID_NIC_2_3 = 167, + GAUDI2_QUEUE_ID_NIC_3_0 = 168, + GAUDI2_QUEUE_ID_NIC_3_1 = 169, + GAUDI2_QUEUE_ID_NIC_3_2 = 170, + GAUDI2_QUEUE_ID_NIC_3_3 = 171, + GAUDI2_QUEUE_ID_NIC_4_0 = 172, + GAUDI2_QUEUE_ID_NIC_4_1 = 173, + GAUDI2_QUEUE_ID_NIC_4_2 = 174, + GAUDI2_QUEUE_ID_NIC_4_3 = 175, + GAUDI2_QUEUE_ID_NIC_5_0 = 176, + GAUDI2_QUEUE_ID_NIC_5_1 = 177, + GAUDI2_QUEUE_ID_NIC_5_2 = 178, + GAUDI2_QUEUE_ID_NIC_5_3 = 179, + GAUDI2_QUEUE_ID_NIC_6_0 = 180, + GAUDI2_QUEUE_ID_NIC_6_1 = 181, + GAUDI2_QUEUE_ID_NIC_6_2 = 182, + GAUDI2_QUEUE_ID_NIC_6_3 = 183, + GAUDI2_QUEUE_ID_NIC_7_0 = 184, + GAUDI2_QUEUE_ID_NIC_7_1 = 185, + GAUDI2_QUEUE_ID_NIC_7_2 = 186, + GAUDI2_QUEUE_ID_NIC_7_3 = 187, + GAUDI2_QUEUE_ID_NIC_8_0 = 188, + GAUDI2_QUEUE_ID_NIC_8_1 = 189, + GAUDI2_QUEUE_ID_NIC_8_2 = 190, + GAUDI2_QUEUE_ID_NIC_8_3 = 191, + GAUDI2_QUEUE_ID_NIC_9_0 = 192, + GAUDI2_QUEUE_ID_NIC_9_1 = 193, + GAUDI2_QUEUE_ID_NIC_9_2 = 194, + GAUDI2_QUEUE_ID_NIC_9_3 = 195, + GAUDI2_QUEUE_ID_NIC_10_0 = 196, + GAUDI2_QUEUE_ID_NIC_10_1 = 197, + GAUDI2_QUEUE_ID_NIC_10_2 = 198, + GAUDI2_QUEUE_ID_NIC_10_3 = 199, + GAUDI2_QUEUE_ID_NIC_11_0 = 200, + GAUDI2_QUEUE_ID_NIC_11_1 = 201, + GAUDI2_QUEUE_ID_NIC_11_2 = 202, + GAUDI2_QUEUE_ID_NIC_11_3 = 203, + GAUDI2_QUEUE_ID_NIC_12_0 = 204, + GAUDI2_QUEUE_ID_NIC_12_1 = 205, + GAUDI2_QUEUE_ID_NIC_12_2 = 206, + GAUDI2_QUEUE_ID_NIC_12_3 = 207, + GAUDI2_QUEUE_ID_NIC_13_0 = 208, + GAUDI2_QUEUE_ID_NIC_13_1 = 209, + GAUDI2_QUEUE_ID_NIC_13_2 = 210, + GAUDI2_QUEUE_ID_NIC_13_3 = 211, + GAUDI2_QUEUE_ID_NIC_14_0 = 212, + GAUDI2_QUEUE_ID_NIC_14_1 = 213, + GAUDI2_QUEUE_ID_NIC_14_2 = 214, + GAUDI2_QUEUE_ID_NIC_14_3 = 215, + GAUDI2_QUEUE_ID_NIC_15_0 = 216, + GAUDI2_QUEUE_ID_NIC_15_1 = 217, + GAUDI2_QUEUE_ID_NIC_15_2 = 218, + GAUDI2_QUEUE_ID_NIC_15_3 = 219, + GAUDI2_QUEUE_ID_NIC_16_0 = 220, + GAUDI2_QUEUE_ID_NIC_16_1 = 221, + GAUDI2_QUEUE_ID_NIC_16_2 = 222, + GAUDI2_QUEUE_ID_NIC_16_3 = 223, + GAUDI2_QUEUE_ID_NIC_17_0 = 224, + GAUDI2_QUEUE_ID_NIC_17_1 = 225, + GAUDI2_QUEUE_ID_NIC_17_2 = 226, + GAUDI2_QUEUE_ID_NIC_17_3 = 227, + GAUDI2_QUEUE_ID_NIC_18_0 = 228, + GAUDI2_QUEUE_ID_NIC_18_1 = 229, + GAUDI2_QUEUE_ID_NIC_18_2 = 230, + GAUDI2_QUEUE_ID_NIC_18_3 = 231, + GAUDI2_QUEUE_ID_NIC_19_0 = 232, + GAUDI2_QUEUE_ID_NIC_19_1 = 233, + GAUDI2_QUEUE_ID_NIC_19_2 = 234, + GAUDI2_QUEUE_ID_NIC_19_3 = 235, + GAUDI2_QUEUE_ID_NIC_20_0 = 236, + GAUDI2_QUEUE_ID_NIC_20_1 = 237, + GAUDI2_QUEUE_ID_NIC_20_2 = 238, + GAUDI2_QUEUE_ID_NIC_20_3 = 239, + GAUDI2_QUEUE_ID_NIC_21_0 = 240, + GAUDI2_QUEUE_ID_NIC_21_1 = 241, + GAUDI2_QUEUE_ID_NIC_21_2 = 242, + GAUDI2_QUEUE_ID_NIC_21_3 = 243, + GAUDI2_QUEUE_ID_NIC_22_0 = 244, + GAUDI2_QUEUE_ID_NIC_22_1 = 245, + GAUDI2_QUEUE_ID_NIC_22_2 = 246, + GAUDI2_QUEUE_ID_NIC_22_3 = 247, + GAUDI2_QUEUE_ID_NIC_23_0 = 248, + GAUDI2_QUEUE_ID_NIC_23_1 = 249, + GAUDI2_QUEUE_ID_NIC_23_2 = 250, + GAUDI2_QUEUE_ID_NIC_23_3 = 251, + GAUDI2_QUEUE_ID_ROT_0_0 = 252, + GAUDI2_QUEUE_ID_ROT_0_1 = 253, + GAUDI2_QUEUE_ID_ROT_0_2 = 254, + GAUDI2_QUEUE_ID_ROT_0_3 = 255, + GAUDI2_QUEUE_ID_ROT_1_0 = 256, + GAUDI2_QUEUE_ID_ROT_1_1 = 257, + GAUDI2_QUEUE_ID_ROT_1_2 = 258, + GAUDI2_QUEUE_ID_ROT_1_3 = 259, + GAUDI2_QUEUE_ID_CPU_PQ = 260, + GAUDI2_QUEUE_ID_SIZE +}; + +/* + * Engine Numbering + * + * Used in the "busy_engines_mask" field in `struct hl_info_hw_idle' + */ + +enum goya_engine_id { + GOYA_ENGINE_ID_DMA_0 = 0, + GOYA_ENGINE_ID_DMA_1, + GOYA_ENGINE_ID_DMA_2, + GOYA_ENGINE_ID_DMA_3, + GOYA_ENGINE_ID_DMA_4, + GOYA_ENGINE_ID_MME_0, + GOYA_ENGINE_ID_TPC_0, + GOYA_ENGINE_ID_TPC_1, + GOYA_ENGINE_ID_TPC_2, + GOYA_ENGINE_ID_TPC_3, + GOYA_ENGINE_ID_TPC_4, + GOYA_ENGINE_ID_TPC_5, + GOYA_ENGINE_ID_TPC_6, + GOYA_ENGINE_ID_TPC_7, + GOYA_ENGINE_ID_SIZE +}; + +enum gaudi_engine_id { + GAUDI_ENGINE_ID_DMA_0 = 0, + GAUDI_ENGINE_ID_DMA_1, + GAUDI_ENGINE_ID_DMA_2, + GAUDI_ENGINE_ID_DMA_3, + GAUDI_ENGINE_ID_DMA_4, + GAUDI_ENGINE_ID_DMA_5, + GAUDI_ENGINE_ID_DMA_6, + GAUDI_ENGINE_ID_DMA_7, + GAUDI_ENGINE_ID_MME_0, + GAUDI_ENGINE_ID_MME_1, + GAUDI_ENGINE_ID_MME_2, + GAUDI_ENGINE_ID_MME_3, + GAUDI_ENGINE_ID_TPC_0, + GAUDI_ENGINE_ID_TPC_1, + GAUDI_ENGINE_ID_TPC_2, + GAUDI_ENGINE_ID_TPC_3, + GAUDI_ENGINE_ID_TPC_4, + GAUDI_ENGINE_ID_TPC_5, + GAUDI_ENGINE_ID_TPC_6, + GAUDI_ENGINE_ID_TPC_7, + GAUDI_ENGINE_ID_NIC_0, + GAUDI_ENGINE_ID_NIC_1, + GAUDI_ENGINE_ID_NIC_2, + GAUDI_ENGINE_ID_NIC_3, + GAUDI_ENGINE_ID_NIC_4, + GAUDI_ENGINE_ID_NIC_5, + GAUDI_ENGINE_ID_NIC_6, + GAUDI_ENGINE_ID_NIC_7, + GAUDI_ENGINE_ID_NIC_8, + GAUDI_ENGINE_ID_NIC_9, + GAUDI_ENGINE_ID_SIZE +}; + +enum gaudi2_engine_id { + GAUDI2_DCORE0_ENGINE_ID_EDMA_0 = 0, + GAUDI2_DCORE0_ENGINE_ID_EDMA_1, + GAUDI2_DCORE0_ENGINE_ID_MME, + GAUDI2_DCORE0_ENGINE_ID_TPC_0, + GAUDI2_DCORE0_ENGINE_ID_TPC_1, + GAUDI2_DCORE0_ENGINE_ID_TPC_2, + GAUDI2_DCORE0_ENGINE_ID_TPC_3, + GAUDI2_DCORE0_ENGINE_ID_TPC_4, + GAUDI2_DCORE0_ENGINE_ID_TPC_5, + GAUDI2_DCORE0_ENGINE_ID_DEC_0, + GAUDI2_DCORE0_ENGINE_ID_DEC_1, + GAUDI2_DCORE1_ENGINE_ID_EDMA_0, + GAUDI2_DCORE1_ENGINE_ID_EDMA_1, + GAUDI2_DCORE1_ENGINE_ID_MME, + GAUDI2_DCORE1_ENGINE_ID_TPC_0, + GAUDI2_DCORE1_ENGINE_ID_TPC_1, + GAUDI2_DCORE1_ENGINE_ID_TPC_2, + GAUDI2_DCORE1_ENGINE_ID_TPC_3, + GAUDI2_DCORE1_ENGINE_ID_TPC_4, + GAUDI2_DCORE1_ENGINE_ID_TPC_5, + GAUDI2_DCORE1_ENGINE_ID_DEC_0, + GAUDI2_DCORE1_ENGINE_ID_DEC_1, + GAUDI2_DCORE2_ENGINE_ID_EDMA_0, + GAUDI2_DCORE2_ENGINE_ID_EDMA_1, + GAUDI2_DCORE2_ENGINE_ID_MME, + GAUDI2_DCORE2_ENGINE_ID_TPC_0, + GAUDI2_DCORE2_ENGINE_ID_TPC_1, + GAUDI2_DCORE2_ENGINE_ID_TPC_2, + GAUDI2_DCORE2_ENGINE_ID_TPC_3, + GAUDI2_DCORE2_ENGINE_ID_TPC_4, + GAUDI2_DCORE2_ENGINE_ID_TPC_5, + GAUDI2_DCORE2_ENGINE_ID_DEC_0, + GAUDI2_DCORE2_ENGINE_ID_DEC_1, + GAUDI2_DCORE3_ENGINE_ID_EDMA_0, + GAUDI2_DCORE3_ENGINE_ID_EDMA_1, + GAUDI2_DCORE3_ENGINE_ID_MME, + GAUDI2_DCORE3_ENGINE_ID_TPC_0, + GAUDI2_DCORE3_ENGINE_ID_TPC_1, + GAUDI2_DCORE3_ENGINE_ID_TPC_2, + GAUDI2_DCORE3_ENGINE_ID_TPC_3, + GAUDI2_DCORE3_ENGINE_ID_TPC_4, + GAUDI2_DCORE3_ENGINE_ID_TPC_5, + GAUDI2_DCORE3_ENGINE_ID_DEC_0, + GAUDI2_DCORE3_ENGINE_ID_DEC_1, + GAUDI2_DCORE0_ENGINE_ID_TPC_6, + GAUDI2_ENGINE_ID_PDMA_0, + GAUDI2_ENGINE_ID_PDMA_1, + GAUDI2_ENGINE_ID_ROT_0, + GAUDI2_ENGINE_ID_ROT_1, + GAUDI2_PCIE_ENGINE_ID_DEC_0, + GAUDI2_PCIE_ENGINE_ID_DEC_1, + GAUDI2_ENGINE_ID_NIC0_0, + GAUDI2_ENGINE_ID_NIC0_1, + GAUDI2_ENGINE_ID_NIC1_0, + GAUDI2_ENGINE_ID_NIC1_1, + GAUDI2_ENGINE_ID_NIC2_0, + GAUDI2_ENGINE_ID_NIC2_1, + GAUDI2_ENGINE_ID_NIC3_0, + GAUDI2_ENGINE_ID_NIC3_1, + GAUDI2_ENGINE_ID_NIC4_0, + GAUDI2_ENGINE_ID_NIC4_1, + GAUDI2_ENGINE_ID_NIC5_0, + GAUDI2_ENGINE_ID_NIC5_1, + GAUDI2_ENGINE_ID_NIC6_0, + GAUDI2_ENGINE_ID_NIC6_1, + GAUDI2_ENGINE_ID_NIC7_0, + GAUDI2_ENGINE_ID_NIC7_1, + GAUDI2_ENGINE_ID_NIC8_0, + GAUDI2_ENGINE_ID_NIC8_1, + GAUDI2_ENGINE_ID_NIC9_0, + GAUDI2_ENGINE_ID_NIC9_1, + GAUDI2_ENGINE_ID_NIC10_0, + GAUDI2_ENGINE_ID_NIC10_1, + GAUDI2_ENGINE_ID_NIC11_0, + GAUDI2_ENGINE_ID_NIC11_1, + GAUDI2_ENGINE_ID_PCIE, + GAUDI2_ENGINE_ID_PSOC, + GAUDI2_ENGINE_ID_ARC_FARM, + GAUDI2_ENGINE_ID_KDMA, + GAUDI2_ENGINE_ID_SIZE +}; + +/* + * ASIC specific PLL index + * + * Used to retrieve in frequency info of different IPs via HL_INFO_PLL_FREQUENCY + * under DRM_IOCTL_HL_INFO IOCTL. The enums need to be used as an index in + * struct hl_pll_frequency_info. + */ + +enum hl_goya_pll_index { + HL_GOYA_CPU_PLL = 0, + HL_GOYA_IC_PLL, + HL_GOYA_MC_PLL, + HL_GOYA_MME_PLL, + HL_GOYA_PCI_PLL, + HL_GOYA_EMMC_PLL, + HL_GOYA_TPC_PLL, + HL_GOYA_PLL_MAX +}; + +enum hl_gaudi_pll_index { + HL_GAUDI_CPU_PLL = 0, + HL_GAUDI_PCI_PLL, + HL_GAUDI_SRAM_PLL, + HL_GAUDI_HBM_PLL, + HL_GAUDI_NIC_PLL, + HL_GAUDI_DMA_PLL, + HL_GAUDI_MESH_PLL, + HL_GAUDI_MME_PLL, + HL_GAUDI_TPC_PLL, + HL_GAUDI_IF_PLL, + HL_GAUDI_PLL_MAX +}; + +enum hl_gaudi2_pll_index { + HL_GAUDI2_CPU_PLL = 0, + HL_GAUDI2_PCI_PLL, + HL_GAUDI2_SRAM_PLL, + HL_GAUDI2_HBM_PLL, + HL_GAUDI2_NIC_PLL, + HL_GAUDI2_DMA_PLL, + HL_GAUDI2_MESH_PLL, + HL_GAUDI2_MME_PLL, + HL_GAUDI2_TPC_PLL, + HL_GAUDI2_IF_PLL, + HL_GAUDI2_VID_PLL, + HL_GAUDI2_MSS_PLL, + HL_GAUDI2_PLL_MAX +}; + +/** + * enum hl_goya_dma_direction - Direction of DMA operation inside a LIN_DMA + * packet that is submitted to the GOYA's DMA QMAN. This attribute is not + * relevant to the H/W but the kernel driver use it to parse the packet's + * addresses and patch/validate them. + * @HL_DMA_HOST_TO_DRAM: DMA operation from Host memory to GOYA's DDR. + * @HL_DMA_HOST_TO_SRAM: DMA operation from Host memory to GOYA's SRAM. + * @HL_DMA_DRAM_TO_SRAM: DMA operation from GOYA's DDR to GOYA's SRAM. + * @HL_DMA_SRAM_TO_DRAM: DMA operation from GOYA's SRAM to GOYA's DDR. + * @HL_DMA_SRAM_TO_HOST: DMA operation from GOYA's SRAM to Host memory. + * @HL_DMA_DRAM_TO_HOST: DMA operation from GOYA's DDR to Host memory. + * @HL_DMA_DRAM_TO_DRAM: DMA operation from GOYA's DDR to GOYA's DDR. + * @HL_DMA_SRAM_TO_SRAM: DMA operation from GOYA's SRAM to GOYA's SRAM. + * @HL_DMA_ENUM_MAX: number of values in enum + */ +enum hl_goya_dma_direction { + HL_DMA_HOST_TO_DRAM, + HL_DMA_HOST_TO_SRAM, + HL_DMA_DRAM_TO_SRAM, + HL_DMA_SRAM_TO_DRAM, + HL_DMA_SRAM_TO_HOST, + HL_DMA_DRAM_TO_HOST, + HL_DMA_DRAM_TO_DRAM, + HL_DMA_SRAM_TO_SRAM, + HL_DMA_ENUM_MAX +}; + +/** + * enum hl_device_status - Device status information. + * @HL_DEVICE_STATUS_OPERATIONAL: Device is operational. + * @HL_DEVICE_STATUS_IN_RESET: Device is currently during reset. + * @HL_DEVICE_STATUS_MALFUNCTION: Device is unusable. + * @HL_DEVICE_STATUS_NEEDS_RESET: Device needs reset because auto reset was + * disabled. + * @HL_DEVICE_STATUS_IN_DEVICE_CREATION: Device is operational but its creation + * is still in progress. + * @HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: Device is currently during + * reset that was triggered because the user released the device + * @HL_DEVICE_STATUS_LAST: Last status. + */ +enum hl_device_status { + HL_DEVICE_STATUS_OPERATIONAL, + HL_DEVICE_STATUS_IN_RESET, + HL_DEVICE_STATUS_MALFUNCTION, + HL_DEVICE_STATUS_NEEDS_RESET, + HL_DEVICE_STATUS_IN_DEVICE_CREATION, + HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE, + HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE +}; + +enum hl_server_type { + HL_SERVER_TYPE_UNKNOWN = 0, + HL_SERVER_GAUDI_HLS1 = 1, + HL_SERVER_GAUDI_HLS1H = 2, + HL_SERVER_GAUDI_TYPE1 = 3, + HL_SERVER_GAUDI_TYPE2 = 4, + HL_SERVER_GAUDI2_HLS2 = 5, + HL_SERVER_GAUDI2_TYPE1 = 7 +}; + +/* + * Notifier event values - for the notification mechanism and the + * HL_INFO_GET_EVENTS command + * + * HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event + * HL_NOTIFIER_EVENT_UNDEFINED_OPCODE - Indicates undefined operation code + * HL_NOTIFIER_EVENT_DEVICE_RESET - Indicates device requires a reset + * HL_NOTIFIER_EVENT_CS_TIMEOUT - Indicates CS timeout error + * HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE - Indicates device is unavailable + * HL_NOTIFIER_EVENT_USER_ENGINE_ERR - Indicates device engine in error state + * HL_NOTIFIER_EVENT_GENERAL_HW_ERR - Indicates device HW error + * HL_NOTIFIER_EVENT_RAZWI - Indicates razwi happened + * HL_NOTIFIER_EVENT_PAGE_FAULT - Indicates page fault happened + * HL_NOTIFIER_EVENT_CRITICAL_HW_ERR - Indicates a HW error that requires SW + * abort and HW reset HL_NOTIFIER_EVENT_CRITICAL_FW_ERR - Indicates a FW + * error that requires SW abort and HW reset + */ +#define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0) +#define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1) +#define HL_NOTIFIER_EVENT_DEVICE_RESET (1ULL << 2) +#define HL_NOTIFIER_EVENT_CS_TIMEOUT (1ULL << 3) +#define HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE (1ULL << 4) +#define HL_NOTIFIER_EVENT_USER_ENGINE_ERR (1ULL << 5) +#define HL_NOTIFIER_EVENT_GENERAL_HW_ERR (1ULL << 6) +#define HL_NOTIFIER_EVENT_RAZWI (1ULL << 7) +#define HL_NOTIFIER_EVENT_PAGE_FAULT (1ULL << 8) +#define HL_NOTIFIER_EVENT_CRITICL_HW_ERR (1ULL << 9) +#define HL_NOTIFIER_EVENT_CRITICL_FW_ERR (1ULL << 10) + +/* Opcode for management ioctl + * + * HW_IP_INFO - Receive information about different IP blocks in the + * device. + * HL_INFO_HW_EVENTS - Receive an array describing how many times each event + * occurred since the last hard reset. + * HL_INFO_DRAM_USAGE - Retrieve the dram usage inside the device and of the + * specific context. This is relevant only for devices + * where the dram is managed by the kernel driver + * HL_INFO_HW_IDLE - Retrieve information about the idle status of each + * internal engine. + * HL_INFO_DEVICE_STATUS - Retrieve the device's status. This opcode doesn't + * require an open context. + * HL_INFO_DEVICE_UTILIZATION - Retrieve the total utilization of the device + * over the last period specified by the user. + * The period can be between 100ms to 1s, in + * resolution of 100ms. The return value is a + * percentage of the utilization rate. + * HL_INFO_HW_EVENTS_AGGREGATE - Receive an array describing how many times each + * event occurred since the driver was loaded. + * HL_INFO_CLK_RATE - Retrieve the current and maximum clock rate + * of the device in MHz. The maximum clock rate is + * configurable via sysfs parameter + * HL_INFO_RESET_COUNT - Retrieve the counts of the soft and hard reset + * operations performed on the device since the last + * time the driver was loaded. + * HL_INFO_TIME_SYNC - Retrieve the device's time alongside the host's time + * for synchronization. + * HL_INFO_CS_COUNTERS - Retrieve command submission counters + * HL_INFO_PCI_COUNTERS - Retrieve PCI counters + * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason + * HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore + * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption + * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency + * HL_INFO_POWER - Retrieve power information + * HL_INFO_OPEN_STATS - Retrieve info regarding recent device open calls + * HL_INFO_DRAM_REPLACED_ROWS - Retrieve DRAM replaced rows info + * HL_INFO_DRAM_PENDING_ROWS - Retrieve DRAM pending rows num + * HL_INFO_LAST_ERR_OPEN_DEV_TIME - Retrieve timestamp of the last time the + * device was opened and CS timeout or razwi error occurred. + * HL_INFO_CS_TIMEOUT_EVENT - Retrieve CS timeout timestamp and its related CS + * sequence number. HL_INFO_RAZWI_EVENT - Retrieve parameters of razwi: + * Timestamp of razwi. + * The address which accessing it caused the razwi. + * Razwi initiator. + * Razwi cause, was it a page fault or MMU access + * error. May return 0 even though no new data is available, in that case + * timestamp will be 0. + * HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES - Retrieve valid page sizes for device + * memory allocation HL_INFO_SECURED_ATTESTATION - Retrieve attestation report + * of the boot. HL_INFO_REGISTER_EVENTFD - Register eventfd for event + * notifications. HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd + * HL_INFO_GET_EVENTS - Retrieve the last occurred events + * HL_INFO_UNDEFINED_OPCODE_EVENT - Retrieve last undefined opcode error + * information. May return 0 even though no new data is available, in that case + * timestamp will be 0. + * HL_INFO_ENGINE_STATUS - Retrieve the status of all the h/w engines in the + * asic. HL_INFO_PAGE_FAULT_EVENT - Retrieve parameters of captured page fault. + * May return 0 even though no new data is available, + * in that case timestamp will be 0. HL_INFO_USER_MAPPINGS - Retrieve user + * mappings, captured after page fault event. HL_INFO_FW_GENERIC_REQ - Send + * generic request to FW. HL_INFO_HW_ERR_EVENT - Retrieve information on the + * reported HW error. May return 0 even though no new data is available, in that + * case timestamp will be 0. HL_INFO_FW_ERR_EVENT - Retrieve information on + * the reported FW error. May return 0 even though no new data is available, in + * that case timestamp will be 0. HL_INFO_USER_ENGINE_ERR_EVENT - Retrieve the + * last engine id that reported an error. + */ +#define HL_INFO_HW_IP_INFO 0 +#define HL_INFO_HW_EVENTS 1 +#define HL_INFO_DRAM_USAGE 2 +#define HL_INFO_HW_IDLE 3 +#define HL_INFO_DEVICE_STATUS 4 +#define HL_INFO_DEVICE_UTILIZATION 6 +#define HL_INFO_HW_EVENTS_AGGREGATE 7 +#define HL_INFO_CLK_RATE 8 +#define HL_INFO_RESET_COUNT 9 +#define HL_INFO_TIME_SYNC 10 +#define HL_INFO_CS_COUNTERS 11 +#define HL_INFO_PCI_COUNTERS 12 +#define HL_INFO_CLK_THROTTLE_REASON 13 +#define HL_INFO_SYNC_MANAGER 14 +#define HL_INFO_TOTAL_ENERGY 15 +#define HL_INFO_PLL_FREQUENCY 16 +#define HL_INFO_POWER 17 +#define HL_INFO_OPEN_STATS 18 +#define HL_INFO_DRAM_REPLACED_ROWS 21 +#define HL_INFO_DRAM_PENDING_ROWS 22 +#define HL_INFO_LAST_ERR_OPEN_DEV_TIME 23 +#define HL_INFO_CS_TIMEOUT_EVENT 24 +#define HL_INFO_RAZWI_EVENT 25 +#define HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES 26 +#define HL_INFO_SECURED_ATTESTATION 27 +#define HL_INFO_REGISTER_EVENTFD 28 +#define HL_INFO_UNREGISTER_EVENTFD 29 +#define HL_INFO_GET_EVENTS 30 +#define HL_INFO_UNDEFINED_OPCODE_EVENT 31 +#define HL_INFO_ENGINE_STATUS 32 +#define HL_INFO_PAGE_FAULT_EVENT 33 +#define HL_INFO_USER_MAPPINGS 34 +#define HL_INFO_FW_GENERIC_REQ 35 +#define HL_INFO_HW_ERR_EVENT 36 +#define HL_INFO_FW_ERR_EVENT 37 +#define HL_INFO_USER_ENGINE_ERR_EVENT 38 +#define HL_INFO_DEV_SIGNED 40 + +#define HL_INFO_VERSION_MAX_LEN 128 +#define HL_INFO_CARD_NAME_MAX_LEN 16 + +/* Maximum buffer size for retrieving engines status */ +#define HL_ENGINES_DATA_MAX_SIZE SZ_1M + +/** + * struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC + * @sram_base_address: The first SRAM physical base address that is free to be + * used by the user. + * @dram_base_address: The first DRAM virtual or physical base address that is + * free to be used by the user. + * @dram_size: The DRAM size that is available to the user. + * @sram_size: The SRAM size that is available to the user. + * @num_of_events: The number of events that can be received from the f/w. This + * is needed so the user can what is the size of the h/w events + * array he needs to pass to the kernel when he wants to fetch + * the event counters. + * @device_id: PCI device ID of the ASIC. + * @module_id: Module ID of the ASIC for mezzanine cards in servers + * (From OCP spec). + * @decoder_enabled_mask: Bit-mask that represents which decoders are enabled. + * @first_available_interrupt_id: The first available interrupt ID for the user + * to be used when it works with user interrupts. + * Relevant for Gaudi2 and later. + * @server_type: Server type that the Gaudi ASIC is currently installed in. + * The value is according to enum hl_server_type + * @cpld_version: CPLD version on the board. + * @psoc_pci_pll_nr: PCI PLL NR value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_nf: PCI PLL NF value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_od: PCI PLL OD value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_div_factor: PCI PLL DIV factor value. Needed by the profiler + * in some ASICs. + * @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant + * for Goya/Gaudi only. + * @dram_enabled: Whether the DRAM is enabled. + * @security_enabled: Whether security is enabled on device. + * @mme_master_slave_mode: Indicate whether the MME is working in master/slave + * configuration. Relevant for Gaudi2 and later. + * @cpucp_version: The CPUCP f/w version. + * @card_name: The card name as passed by the f/w. + * @tpc_enabled_mask_ext: Bit-mask that represents which TPCs are enabled. + * Relevant for Gaudi2 and later. + * @dram_page_size: The DRAM physical page size. + * @edma_enabled_mask: Bit-mask that represents which EDMAs are enabled. + * Relevant for Gaudi2 and later. + * @number_of_user_interrupts: The number of interrupts that are available to + * the userspace application to use. Relevant for Gaudi2 and later. + * @device_mem_alloc_default_page_size: default page size used in device memory + * allocation. + * @revision_id: PCI revision ID of the ASIC. + * @tpc_interrupt_id: interrupt id for TPC to use in order to raise events + * towards the host. + * @rotator_enabled_mask: Bit-mask that represents which rotators are enabled. + * Relevant for Gaudi3 and later. + * @engine_core_interrupt_reg_addr: interrupt register address for engine core + * to use in order to raise events toward FW. + * @reserved_dram_size: DRAM size reserved for driver and firmware. + */ +struct hl_info_hw_ip_info { + __u64 sram_base_address; + __u64 dram_base_address; + __u64 dram_size; + __u32 sram_size; + __u32 num_of_events; + __u32 device_id; + __u32 module_id; + __u32 decoder_enabled_mask; + __u16 first_available_interrupt_id; + __u16 server_type; + __u32 cpld_version; + __u32 psoc_pci_pll_nr; + __u32 psoc_pci_pll_nf; + __u32 psoc_pci_pll_od; + __u32 psoc_pci_pll_div_factor; + __u8 tpc_enabled_mask; + __u8 dram_enabled; + __u8 security_enabled; + __u8 mme_master_slave_mode; + __u8 cpucp_version[HL_INFO_VERSION_MAX_LEN]; + __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN]; + __u64 tpc_enabled_mask_ext; + __u64 dram_page_size; + __u32 edma_enabled_mask; + __u16 number_of_user_interrupts; + __u8 reserved1; + __u8 reserved2; + __u64 reserved3; + __u64 device_mem_alloc_default_page_size; + __u64 reserved4; + __u64 reserved5; + __u32 reserved6; + __u8 reserved7; + __u8 revision_id; + __u16 tpc_interrupt_id; + __u32 rotator_enabled_mask; + __u32 reserved9; + __u64 engine_core_interrupt_reg_addr; + __u64 reserved_dram_size; +}; + +struct hl_info_dram_usage { + __u64 dram_free_mem; + __u64 ctx_dram_mem; +}; + +#define HL_BUSY_ENGINES_MASK_EXT_SIZE 4 + +struct hl_info_hw_idle { + __u32 is_idle; + /* + * Bitmask of busy engines. + * Bits definition is according to `enum <chip>_engine_id'. + */ + __u32 busy_engines_mask; + + /* + * Extended Bitmask of busy engines. + * Bits definition is according to `enum <chip>_engine_id'. + */ + __u64 busy_engines_mask_ext[HL_BUSY_ENGINES_MASK_EXT_SIZE]; +}; + +struct hl_info_device_status { + __u32 status; + __u32 pad; +}; + +struct hl_info_device_utilization { + __u32 utilization; + __u32 pad; +}; + +struct hl_info_clk_rate { + __u32 cur_clk_rate_mhz; + __u32 max_clk_rate_mhz; +}; + +struct hl_info_reset_count { + __u32 hard_reset_cnt; + __u32 soft_reset_cnt; +}; + +struct hl_info_time_sync { + __u64 device_time; + __u64 host_time; + __u64 tsc_time; +}; + +/** + * struct hl_info_pci_counters - pci counters + * @rx_throughput: PCI rx throughput KBps + * @tx_throughput: PCI tx throughput KBps + * @replay_cnt: PCI replay counter + */ +struct hl_info_pci_counters { + __u64 rx_throughput; + __u64 tx_throughput; + __u64 replay_cnt; +}; + +enum hl_clk_throttling_type { + HL_CLK_THROTTLE_TYPE_POWER, + HL_CLK_THROTTLE_TYPE_THERMAL, + HL_CLK_THROTTLE_TYPE_MAX +}; + +/* clk_throttling_reason masks */ +#define HL_CLK_THROTTLE_POWER (1 << HL_CLK_THROTTLE_TYPE_POWER) +#define HL_CLK_THROTTLE_THERMAL (1 << HL_CLK_THROTTLE_TYPE_THERMAL) + +/** + * struct hl_info_clk_throttle - clock throttling reason + * @clk_throttling_reason: each bit represents a clk throttling reason + * @clk_throttling_timestamp_us: represents CPU timestamp in microseconds of the + * start-event + * @clk_throttling_duration_ns: the clock throttle time in nanosec + */ +struct hl_info_clk_throttle { + __u32 clk_throttling_reason; + __u32 pad; + __u64 clk_throttling_timestamp_us[HL_CLK_THROTTLE_TYPE_MAX]; + __u64 clk_throttling_duration_ns[HL_CLK_THROTTLE_TYPE_MAX]; +}; + +/** + * struct hl_info_energy - device energy information + * @total_energy_consumption: total device energy consumption + */ +struct hl_info_energy { + __u64 total_energy_consumption; +}; + +#define HL_PLL_NUM_OUTPUTS 4 + +struct hl_pll_frequency_info { + __u16 output[HL_PLL_NUM_OUTPUTS]; +}; + +/** + * struct hl_open_stats_info - device open statistics information + * @open_counter: ever growing counter, increased on each successful dev open + * @last_open_period_ms: duration (ms) device was open last time + * @is_compute_ctx_active: Whether there is an active compute context executing + * @compute_ctx_in_release: true if the current compute context is being + * released + */ +struct hl_open_stats_info { + __u64 open_counter; + __u64 last_open_period_ms; + __u8 is_compute_ctx_active; + __u8 compute_ctx_in_release; + __u8 pad[6]; +}; + +/** + * struct hl_power_info - power information + * @power: power consumption + */ +struct hl_power_info { + __u64 power; +}; + +/** + * struct hl_info_sync_manager - sync manager information + * @first_available_sync_object: first available sob + * @first_available_monitor: first available monitor + * @first_available_cq: first available cq + */ +struct hl_info_sync_manager { + __u32 first_available_sync_object; + __u32 first_available_monitor; + __u32 first_available_cq; + __u32 reserved; +}; + +/** + * struct hl_info_cs_counters - command submission counters + * @total_out_of_mem_drop_cnt: total dropped due to memory allocation issue + * @ctx_out_of_mem_drop_cnt: context dropped due to memory allocation issue + * @total_parsing_drop_cnt: total dropped due to error in packet parsing + * @ctx_parsing_drop_cnt: context dropped due to error in packet parsing + * @total_queue_full_drop_cnt: total dropped due to queue full + * @ctx_queue_full_drop_cnt: context dropped due to queue full + * @total_device_in_reset_drop_cnt: total dropped due to device in reset + * @ctx_device_in_reset_drop_cnt: context dropped due to device in reset + * @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight + * @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight + * @total_validation_drop_cnt: total dropped due to validation error + * @ctx_validation_drop_cnt: context dropped due to validation error + */ +struct hl_info_cs_counters { + __u64 total_out_of_mem_drop_cnt; + __u64 ctx_out_of_mem_drop_cnt; + __u64 total_parsing_drop_cnt; + __u64 ctx_parsing_drop_cnt; + __u64 total_queue_full_drop_cnt; + __u64 ctx_queue_full_drop_cnt; + __u64 total_device_in_reset_drop_cnt; + __u64 ctx_device_in_reset_drop_cnt; + __u64 total_max_cs_in_flight_drop_cnt; + __u64 ctx_max_cs_in_flight_drop_cnt; + __u64 total_validation_drop_cnt; + __u64 ctx_validation_drop_cnt; +}; + +/** + * struct hl_info_last_err_open_dev_time - last error boot information. + * @timestamp: timestamp of last time the device was opened and error occurred. + */ +struct hl_info_last_err_open_dev_time { + __s64 timestamp; +}; + +/** + * struct hl_info_cs_timeout_event - last CS timeout information. + * @timestamp: timestamp when last CS timeout event occurred. + * @seq: sequence number of last CS timeout event. + */ +struct hl_info_cs_timeout_event { + __s64 timestamp; + __u64 seq; +}; + +#define HL_RAZWI_NA_ENG_ID U16_MAX +#define HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR 128 +#define HL_RAZWI_READ BIT(0) +#define HL_RAZWI_WRITE BIT(1) +#define HL_RAZWI_LBW BIT(2) +#define HL_RAZWI_HBW BIT(3) +#define HL_RAZWI_RR BIT(4) +#define HL_RAZWI_ADDR_DEC BIT(5) + +/** + * struct hl_info_razwi_event - razwi information. + * @timestamp: timestamp of razwi. + * @addr: address which accessing it caused razwi. + * @engine_id: engine id of the razwi initiator, if it was initiated by engine + * that does not have engine id it will be set to HL_RAZWI_NA_ENG_ID. If there + * are several possible engines which caused the razwi, it will hold all of + * them. + * @num_of_possible_engines: contains number of possible engine ids. In some + * asics, razwi indication might be common for several engines and there is no + * way to get the exact engine. In this way, engine_id array will be filled with + * all possible engines caused this razwi. Also, there might be possibility in + * gaudi, where we don't indication on specific engine, in that case the value + * of this parameter will be zero. + * @flags: bitmask for additional data: HL_RAZWI_READ - razwi caused by read + * operation HL_RAZWI_WRITE - razwi caused by write operation HL_RAZWI_LBW - + * razwi caused by lbw fabric transaction HL_RAZWI_HBW - razwi caused by hbw + * fabric transaction HL_RAZWI_RR - razwi caused by range register + * HL_RAZWI_ADDR_DEC - razwi caused by + * address decode error Note: this data is not supported by all asics, in that + * case the relevant bits will not be set. + */ +struct hl_info_razwi_event { + __s64 timestamp; + __u64 addr; + __u16 engine_id[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR]; + __u16 num_of_possible_engines; + __u8 flags; + __u8 pad[5]; +}; + +#define MAX_QMAN_STREAMS_INFO 4 +#define OPCODE_INFO_MAX_ADDR_SIZE 8 +/** + * struct hl_info_undefined_opcode_event - info about last undefined opcode + * error + * @timestamp: timestamp of the undefined opcode error + * @cb_addr_streams: CB addresses (per stream) that are currently exists in the + * PQ entries. In case all streams array entries are filled with values, it + * means the execution was in Lower-CP. + * @cq_addr: the address of the current handled command buffer + * @cq_size: the size of the current handled command buffer + * @cb_addr_streams_len: num of streams - actual len of cb_addr_streams array. + * should be equal to 1 in case of undefined opcode + * in Upper-CP (specific stream) and equal to 4 incase + * of undefined opcode in Lower-CP. + * @engine_id: engine-id that the error occurred on + * @stream_id: the stream id the error occurred on. In case the stream equals to + * MAX_QMAN_STREAMS_INFO it means the error occurred on a Lower-CP. + */ +struct hl_info_undefined_opcode_event { + __s64 timestamp; + __u64 cb_addr_streams[MAX_QMAN_STREAMS_INFO][OPCODE_INFO_MAX_ADDR_SIZE]; + __u64 cq_addr; + __u32 cq_size; + __u32 cb_addr_streams_len; + __u32 engine_id; + __u32 stream_id; +}; + +/** + * struct hl_info_hw_err_event - info about HW error + * @timestamp: timestamp of error occurrence + * @event_id: The async event ID (specific to each device type). + * @pad: size padding for u64 granularity. + */ +struct hl_info_hw_err_event { + __s64 timestamp; + __u16 event_id; + __u16 pad[3]; +}; + +/* FW error definition for event_type in struct hl_info_fw_err_event */ +enum hl_info_fw_err_type { + HL_INFO_FW_HEARTBEAT_ERR, + HL_INFO_FW_REPORTED_ERR, +}; + +/** + * struct hl_info_fw_err_event - info about FW error + * @timestamp: time-stamp of error occurrence + * @err_type: The type of event as defined in hl_info_fw_err_type. + * @event_id: The async event ID (specific to each device type, applicable only + * when event type is HL_INFO_FW_REPORTED_ERR). + * @pad: size padding for u64 granularity. + */ +struct hl_info_fw_err_event { + __s64 timestamp; + __u16 err_type; + __u16 event_id; + __u32 pad; +}; + +/** + * struct hl_info_engine_err_event - engine error info + * @timestamp: time-stamp of error occurrence + * @engine_id: engine id who reported the error. + * @error_count: Amount of errors reported. + * @pad: size padding for u64 granularity. + */ +struct hl_info_engine_err_event { + __s64 timestamp; + __u16 engine_id; + __u16 error_count; + __u32 pad; +}; + +/** + * struct hl_info_dev_memalloc_page_sizes - valid page sizes in device mem alloc + * information. + * @page_order_bitmask: bitmap in which a set bit represents the order of the + * supported page size (e.g. 0x2100000 means that 1MB and 32MB pages are + * supported). + */ +struct hl_info_dev_memalloc_page_sizes { + __u64 page_order_bitmask; +}; + +#define SEC_PCR_DATA_BUF_SZ 256 +#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */ +#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */ +#define SEC_DEV_INFO_BUF_SZ 5120 + +/* + * struct hl_info_sec_attest - attestation report of the boot + * @nonce: number only used once. random number provided by host. this also + * passed to the quote command as a qualifying data. + * @pcr_quote_len: length of the attestation quote data (bytes) + * @pub_data_len: length of the public data (bytes) + * @certificate_len: length of the certificate (bytes) + * @pcr_num_reg: number of PCR registers in the pcr_data array + * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes) + * @quote_sig_len: length of the attestation report signature (bytes) + * @pcr_data: raw values of the PCR registers + * @pcr_quote: attestation report data structure + * @quote_sig: signature structure of the attestation report + * @public_data: public key for the signed attestation + * (outPublic + name + qualifiedName) + * @certificate: certificate for the attestation signing key + */ +struct hl_info_sec_attest { + __u32 nonce; + __u16 pcr_quote_len; + __u16 pub_data_len; + __u16 certificate_len; + __u8 pcr_num_reg; + __u8 pcr_reg_len; + __u8 quote_sig_len; + __u8 pcr_data[SEC_PCR_DATA_BUF_SZ]; + __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ]; + __u8 quote_sig[SEC_SIGNATURE_BUF_SZ]; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; + __u8 pad0[2]; +}; + +/* + * struct hl_info_signed - device information signed by a secured device. + * @nonce: number only used once. random number provided by host. this also + * passed to the quote command as a qualifying data. + * @pub_data_len: length of the public data (bytes) + * @certificate_len: length of the certificate (bytes) + * @info_sig_len: length of the attestation signature (bytes) + * @public_data: public key info signed info data (outPublic + name + + * qualifiedName) + * @certificate: certificate for the signing key + * @info_sig: signature of the info + nonce data. + * @dev_info_len: length of device info (bytes) + * @dev_info: device info as byte array. + */ +struct hl_info_signed { + __u32 nonce; + __u16 pub_data_len; + __u16 certificate_len; + __u8 info_sig_len; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; + __u8 info_sig[SEC_SIGNATURE_BUF_SZ]; + __u16 dev_info_len; + __u8 dev_info[SEC_DEV_INFO_BUF_SZ]; + __u8 pad[2]; +}; + +/** + * struct hl_page_fault_info - page fault information. + * @timestamp: timestamp of page fault. + * @addr: address which accessing it caused page fault. + * @engine_id: engine id which caused the page fault, supported only in gaudi3. + */ +struct hl_page_fault_info { + __s64 timestamp; + __u64 addr; + __u16 engine_id; + __u8 pad[6]; +}; + +/** + * struct hl_user_mapping - user mapping information. + * @dev_va: device virtual address. + * @size: virtual address mapping size. + */ +struct hl_user_mapping { + __u64 dev_va; + __u64 size; +}; + +enum gaudi_dcores { + HL_GAUDI_WS_DCORE, + HL_GAUDI_WN_DCORE, + HL_GAUDI_EN_DCORE, + HL_GAUDI_ES_DCORE +}; + +/** + * struct hl_info_args - Main structure to retrieve device related information. + * @return_pointer: User space address of the relevant structure related to + * HL_INFO_* operation mentioned in @op. + * @return_size: Size of the structure used in @return_pointer, just like "size" + * in "snprintf", it limits how many bytes the kernel can write. For hw_events + * array, the size should be hl_info_hw_ip_info.num_of_events * sizeof(__u32). + * @op: Defines which type of information to be retrieved. Refer HL_INFO_* for + * details. + * @dcore_id: DCORE id for which the information is relevant (for Gaudi refer to + * enum gaudi_dcores). + * @ctx_id: Context ID of the user. Currently not in use. + * @period_ms: Period value, in milliseconds, for utilization rate in range + * 100ms - 1000ms in 100 ms resolution. Currently not in use. + * @pll_index: Index as defined in hl_<asic type>_pll_index enumeration. + * @eventfd: event file descriptor for event notifications. + * @user_buffer_actual_size: Actual data size which was copied to user allocated + * buffer by the driver. It is possible for the user to allocate buffer larger + * than needed, hence updating this variable so user will know the exact amount + * of bytes copied by the kernel to the buffer. + * @sec_attest_nonce: Nonce number used for attestation report. + * @array_size: Number of array members copied to user buffer. + * Relevant for HL_INFO_USER_MAPPINGS info ioctl. + * @fw_sub_opcode: generic requests sub opcodes. + * @pad: Padding to 64 bit. + */ +struct hl_info_args { + __u64 return_pointer; + __u32 return_size; + __u32 op; + + union { + __u32 dcore_id; + __u32 ctx_id; + __u32 period_ms; + __u32 pll_index; + __u32 eventfd; + __u32 user_buffer_actual_size; + __u32 sec_attest_nonce; + __u32 array_size; + __u32 fw_sub_opcode; + }; + + __u32 pad; +}; + +/* Opcode to create a new command buffer */ +#define HL_CB_OP_CREATE 0 +/* Opcode to destroy previously created command buffer */ +#define HL_CB_OP_DESTROY 1 +/* Opcode to retrieve information about a command buffer */ +#define HL_CB_OP_INFO 2 + +/* 2MB minus 32 bytes for 2xMSG_PROT */ +#define HL_MAX_CB_SIZE (0x200000 - 32) + +/* Indicates whether the command buffer should be mapped to the device's MMU */ +#define HL_CB_FLAGS_MAP 0x1 + +/* Used with HL_CB_OP_INFO opcode to get the device va address for kernel mapped + * CB */ +#define HL_CB_FLAGS_GET_DEVICE_VA 0x2 + +struct hl_cb_in { + /* Handle of CB or 0 if we want to create one */ + __u64 cb_handle; + /* HL_CB_OP_* */ + __u32 op; + + /* Size of CB. Maximum size is HL_MAX_CB_SIZE. The minimum size that + * will be allocated, regardless of this parameter's value, is PAGE_SIZE + */ + __u32 cb_size; + + /* Context ID - Currently not in use */ + __u32 ctx_id; + /* HL_CB_FLAGS_* */ + __u32 flags; +}; + +struct hl_cb_out { + union { + /* Handle of CB */ + __u64 cb_handle; + + union { + /* Information about CB */ + struct { + /* Usage count of CB */ + __u32 usage_cnt; + __u32 pad; + }; + + /* CB mapped address to device MMU */ + __u64 device_va; + }; + }; +}; + +union hl_cb_args { + struct hl_cb_in in; + struct hl_cb_out out; +}; + +/* HL_CS_CHUNK_FLAGS_ values + * + * HL_CS_CHUNK_FLAGS_USER_ALLOC_CB: + * Indicates if the CB was allocated and mapped by userspace + * (relevant to Gaudi2 and later). User allocated CB is a command buffer, + * allocated by the user, via malloc (or similar). After allocating the + * CB, the user invokes - “memory ioctl” to map the user memory into a + * device virtual address. The user provides this address via the + * cb_handle field. The interface provides the ability to create a + * large CBs, Which aren’t limited to “HL_MAX_CB_SIZE”. Therefore, it + * increases the PCI-DMA queues throughput. This CB allocation method + * also reduces the use of Linux DMA-able memory pool. Which are limited + * and used by other Linux sub-systems. + */ +#define HL_CS_CHUNK_FLAGS_USER_ALLOC_CB 0x1 + +/* + * This structure size must always be fixed to 64-bytes for backward + * compatibility + */ +struct hl_cs_chunk { + union { + /* Goya/Gaudi: + * For external queue, this represents a Handle of CB on the + * Host. + * For internal queue in Goya, this represents an SRAM or + * a DRAM address of the internal CB. In Gaudi, this might also + * represent a mapped host address of the CB. + * + * Gaudi2 onwards: + * For H/W queue, this represents either a Handle of CB on the + * Host, or an SRAM, a DRAM, or a mapped host address of the CB. + * + * A mapped host address is in the device address space, after + * a host address was mapped by the device MMU. + */ + __u64 cb_handle; + + /* Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set + * This holds address of array of u64 values that contain + * signal CS sequence numbers. The wait described by + * this job will listen on all those signals + * (wait event per signal) + */ + __u64 signal_seq_arr; + + /* + * Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set + * along with HL_CS_FLAGS_ENCAP_SIGNALS. + * This is the CS sequence which has the encapsulated signals. + */ + __u64 encaps_signal_seq; + }; + + /* Index of queue to put the CB on */ + __u32 queue_index; + + union { + /* + * Size of command buffer with valid packets + * Can be smaller then actual CB size + */ + __u32 cb_size; + + /* Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set. + * Number of entries in signal_seq_arr + */ + __u32 num_signal_seq_arr; + + /* Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set along + * with HL_CS_FLAGS_ENCAP_SIGNALS + * This set the signals range that the user want to wait for + * out of the whole reserved signals range. + * e.g if the signals range is 20, and user don't want + * to wait for signal 8, so he set this offset to 7, then + * he call the API again with 9 and so on till 20. + */ + __u32 encaps_signal_offset; + }; + + /* HL_CS_CHUNK_FLAGS_* */ + __u32 cs_chunk_flags; + + /* Relevant only when HL_CS_FLAGS_COLLECTIVE_WAIT is set. + * This holds the collective engine ID. The wait described by this job + * will sync with this engine and with all NICs before completion. + */ + __u32 collective_engine_id; + + /* Align structure to 64 bytes */ + __u32 pad[10]; +}; + +/* SIGNAL/WAIT/COLLECTIVE_WAIT flags are mutually exclusive */ +#define HL_CS_FLAGS_FORCE_RESTORE 0x1 +#define HL_CS_FLAGS_SIGNAL 0x2 +#define HL_CS_FLAGS_WAIT 0x4 +#define HL_CS_FLAGS_COLLECTIVE_WAIT 0x8 + +#define HL_CS_FLAGS_TIMESTAMP 0x20 +#define HL_CS_FLAGS_STAGED_SUBMISSION 0x40 +#define HL_CS_FLAGS_STAGED_SUBMISSION_FIRST 0x80 +#define HL_CS_FLAGS_STAGED_SUBMISSION_LAST 0x100 +#define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200 +#define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400 + +/* + * The encapsulated signals CS is merged into the existing CS ioctls. + * In order to use this feature need to follow the below procedure: + * 1. Reserve signals, set the CS type to HL_CS_FLAGS_RESERVE_SIGNALS_ONLY + * the output of this API will be the SOB offset from CFG_BASE. + * this address will be used to patch CB cmds to do the signaling for this + * SOB by incrementing it's value. + * for reverting the reservation use HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY + * CS type, note that this might fail if out-of-sync happened to the SOB + * value, in case other signaling request to the same SOB occurred between + * reserve-unreserve calls. + * 2. Use the staged CS to do the encapsulated signaling jobs. + * use HL_CS_FLAGS_STAGED_SUBMISSION and HL_CS_FLAGS_STAGED_SUBMISSION_FIRST + * along with HL_CS_FLAGS_ENCAP_SIGNALS flag, and set encaps_signal_offset + * field. This offset allows app to wait on part of the reserved signals. + * 3. Use WAIT/COLLECTIVE WAIT CS along with HL_CS_FLAGS_ENCAP_SIGNALS flag + * to wait for the encapsulated signals. + */ +#define HL_CS_FLAGS_ENCAP_SIGNALS 0x800 +#define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000 +#define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000 + +/* + * The engine cores CS is merged into the existing CS ioctls. + * Use it to control the engine cores mode. + */ +#define HL_CS_FLAGS_ENGINE_CORE_COMMAND 0x4000 + +/* + * The flush HBW PCI writes is merged into the existing CS ioctls. + * Used to flush all HBW PCI writes. + * This is a blocking operation and for this reason the user shall not use + * the return sequence number (which will be invalid anyway) + */ +#define HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES 0x8000 + +/* + * The engines CS is merged into the existing CS ioctls. + * Use it to control engines modes. + */ +#define HL_CS_FLAGS_ENGINES_COMMAND 0x10000 + +#define HL_CS_STATUS_SUCCESS 0 + +#define HL_MAX_JOBS_PER_CS 512 + +/* + * enum hl_engine_command - engine command + * + * @HL_ENGINE_CORE_HALT: engine core halt + * @HL_ENGINE_CORE_RUN: engine core run + * @HL_ENGINE_STALL: user engine/s stall + * @HL_ENGINE_RESUME: user engine/s resume + */ +enum hl_engine_command { + HL_ENGINE_CORE_HALT = 1, + HL_ENGINE_CORE_RUN = 2, + HL_ENGINE_STALL = 3, + HL_ENGINE_RESUME = 4, + HL_ENGINE_COMMAND_MAX +}; + +struct hl_cs_in { + union { + struct { + /* this holds address of array of hl_cs_chunk for + * restore phase */ + __u64 chunks_restore; + + /* holds address of array of hl_cs_chunk for execution + * phase */ + __u64 chunks_execute; + }; + + /* Valid only when HL_CS_FLAGS_ENGINE_CORE_COMMAND is set */ + struct { + /* this holds address of array of uint32 for + * engine_cores */ + __u64 engine_cores; + + /* number of engine cores in engine_cores array */ + __u32 num_engine_cores; + + /* the core command to be sent towards engine cores */ + __u32 core_command; + }; + + /* Valid only when HL_CS_FLAGS_ENGINES_COMMAND is set */ + struct { + /* this holds address of array of uint32 for engines */ + __u64 engines; + + /* number of engines in engines array */ + __u32 num_engines; + + /* the engine command to be sent towards engines */ + __u32 engine_command; + }; + }; + + union { + /* + * Sequence number of a staged submission CS + * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set and + * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST is unset. + */ + __u64 seq; + + /* + * Encapsulated signals handle id + * Valid for two flows: + * 1. CS with encapsulated signals: + * when HL_CS_FLAGS_STAGED_SUBMISSION and + * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST + * and HL_CS_FLAGS_ENCAP_SIGNALS are set. + * 2. unreserve signals: + * valid when HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY is set. + */ + __u32 encaps_sig_handle_id; + + /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ + struct { + /* Encapsulated signals number */ + __u32 encaps_signals_count; + + /* Encapsulated signals queue index (stream) */ + __u32 encaps_signals_q_idx; + }; + }; + + /* Number of chunks in restore phase array. Maximum number is + * HL_MAX_JOBS_PER_CS + */ + __u32 num_chunks_restore; + + /* Number of chunks in execution array. Maximum number is + * HL_MAX_JOBS_PER_CS + */ + __u32 num_chunks_execute; + + /* timeout in seconds - valid only if HL_CS_FLAGS_CUSTOM_TIMEOUT + * is set + */ + __u32 timeout; + + /* HL_CS_FLAGS_* */ + __u32 cs_flags; + + /* Context ID - Currently not in use */ + __u32 ctx_id; + __u8 pad[4]; +}; + +struct hl_cs_out { + union { + /* + * seq holds the sequence number of the CS to pass to wait + * ioctl. All values are valid except for 0 and ULLONG_MAX + */ + __u64 seq; + + /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ + struct { + /* This is the reserved signal handle id */ + __u32 handle_id; + + /* This is the signals count */ + __u32 count; + }; + }; + + /* HL_CS_STATUS */ + __u32 status; + + /* + * SOB base address offset + * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY or + * HL_CS_FLAGS_SIGNAL is set + */ + __u32 sob_base_addr_offset; + + /* + * Count of completed signals in SOB before current signal submission. + * Valid only when (HL_CS_FLAGS_ENCAP_SIGNALS & + * HL_CS_FLAGS_STAGED_SUBMISSION) or HL_CS_FLAGS_SIGNAL is set + */ + __u16 sob_count_before_submission; + __u16 pad[3]; +}; + +union hl_cs_args { + struct hl_cs_in in; + struct hl_cs_out out; +}; + +#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 +#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 +#define HL_WAIT_CS_FLAGS_ANY_CQ_INTERRUPT 0xFFF00000 +#define HL_WAIT_CS_FLAGS_ANY_DEC_INTERRUPT 0xFFE00000 +#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 +#define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ 0x10 +#define HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT 0x20 + +#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32 + +struct hl_wait_cs_in { + union { + struct { + /* + * In case of wait_cs holds the CS sequence number. + * In case of wait for multi CS hold a user pointer to + * an array of CS sequence numbers + */ + __u64 seq; + /* Absolute timeout to wait for command submission + * in microseconds + */ + __u64 timeout_us; + }; + + struct { + union { + /* User address for completion comparison. + * upon interrupt, driver will compare the value + * pointed by this address with the supplied + * target value. in order not to perform any + * comparison, set address to all 1s. Relevant + * only when HL_WAIT_CS_FLAGS_INTERRUPT is set + */ + __u64 addr; + + /* cq_counters_handle to a kernel mapped cb + * which contains cq counters. Relevant only + * when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is + * set + */ + __u64 cq_counters_handle; + }; + + /* Target value for completion comparison */ + __u64 target; + }; + }; + + /* Context ID - Currently not in use */ + __u32 ctx_id; + + /* HL_WAIT_CS_FLAGS_* + * If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include + * interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK + * + * in order to wait for any CQ interrupt, set interrupt value to + * HL_WAIT_CS_FLAGS_ANY_CQ_INTERRUPT. + * + * in order to wait for any decoder interrupt, set interrupt value to + * HL_WAIT_CS_FLAGS_ANY_DEC_INTERRUPT. + */ + __u32 flags; + + union { + struct { + /* Multi CS API info- valid entries in multi-CS array */ + __u8 seq_arr_len; + __u8 pad[7]; + }; + + /* Absolute timeout to wait for an interrupt in microseconds. + * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set + */ + __u64 interrupt_timeout_us; + }; + + /* + * cq counter offset inside the counters cb pointed by + * cq_counters_handle above. upon interrupt, driver will compare the + * value pointed by this address (cq_counters_handle + + * cq_counters_offset) with the supplied target value. relevant only + * when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set + */ + __u64 cq_counters_offset; + + /* + * Timestamp_handle timestamps buffer handle. + * relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set + */ + __u64 timestamp_handle; + + /* + * Timestamp_offset is offset inside the timestamp buffer pointed by + * timestamp_handle above. upon interrupt, if the cq reached the target + * value then driver will write timestamp to this offset. relevant only + * when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set + */ + __u64 timestamp_offset; +}; + +#define HL_WAIT_CS_STATUS_COMPLETED 0 +#define HL_WAIT_CS_STATUS_BUSY 1 +#define HL_WAIT_CS_STATUS_TIMEDOUT 2 +#define HL_WAIT_CS_STATUS_ABORTED 3 + +#define HL_WAIT_CS_STATUS_FLAG_GONE 0x1 +#define HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD 0x2 + +struct hl_wait_cs_out { + /* HL_WAIT_CS_STATUS_* */ + __u32 status; + /* HL_WAIT_CS_STATUS_FLAG* */ + __u32 flags; + /* + * valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set + * for wait_cs: timestamp of CS completion + * for wait_multi_cs: timestamp of FIRST CS completion + */ + __s64 timestamp_nsec; + /* multi CS completion bitmap */ + __u32 cs_completion_map; + __u32 pad; +}; + +union hl_wait_cs_args { + struct hl_wait_cs_in in; + struct hl_wait_cs_out out; +}; + +/* Opcode to allocate device memory */ +#define HL_MEM_OP_ALLOC 0 + +/* Opcode to free previously allocated device memory */ +#define HL_MEM_OP_FREE 1 + +/* Opcode to map host and device memory */ +#define HL_MEM_OP_MAP 2 + +/* Opcode to unmap previously mapped host and device memory */ +#define HL_MEM_OP_UNMAP 3 + +/* Opcode to map a hw block */ +#define HL_MEM_OP_MAP_BLOCK 4 + +/* Opcode to create DMA-BUF object for an existing device memory allocation + * and to export an FD of that DMA-BUF back to the caller + */ +#define HL_MEM_OP_EXPORT_DMABUF_FD 5 + +/* Opcode to create timestamps pool for user interrupts registration support + * The memory will be allocated by the kernel driver, A timestamp buffer which + * the user will get handle to it for mmap, and another internal buffer used by + * the driver for registration management The memory will be freed when the user + * closes the file descriptor(ctx close) + */ +#define HL_MEM_OP_TS_ALLOC 6 + +/* Memory flags */ +#define HL_MEM_CONTIGUOUS 0x1 +#define HL_MEM_SHARED 0x2 +#define HL_MEM_USERPTR 0x4 +#define HL_MEM_FORCE_HINT 0x8 +#define HL_MEM_PREFETCH 0x40 + +/** + * structure hl_mem_in - structure that handle input args for memory IOCTL + * @union arg: union of structures to be used based on the input operation + * @op: specify the requested memory operation (one of the HL_MEM_OP_* + * definitions). + * @flags: flags for the memory operation (one of the HL_MEM_* definitions). + * For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the + * DMA-BUF file/FD flags. + * @ctx_id: context ID - currently not in use. + * @num_of_elements: number of timestamp elements used only with + * HL_MEM_OP_TS_ALLOC opcode. + */ +struct hl_mem_in { + union { + /** + * structure for device memory allocation (used with the + * HL_MEM_OP_ALLOC op) + * @mem_size: memory size to allocate + * @page_size: page size to use on allocation. when the value is + * 0 the default page size will be taken. + */ + struct { + __u64 mem_size; + __u64 page_size; + } alloc; + + /** + * structure for free-ing device memory (used with the + * HL_MEM_OP_FREE op) + * @handle: handle returned from HL_MEM_OP_ALLOC + */ + struct { + __u64 handle; + } free; + + /** + * structure for mapping device memory (used with the + * HL_MEM_OP_MAP op) + * @hint_addr: requested virtual address of mapped memory. + * the driver will try to map the requested region + * to this hint address, as long as the address is valid and not + * already mapped. the user should check the returned address of + * the IOCTL to make sure he got the hint address. passing 0 + * here means that the driver will choose the address itself. + * @handle: handle returned from HL_MEM_OP_ALLOC. + */ + struct { + __u64 hint_addr; + __u64 handle; + } map_device; + + /** + * structure for mapping host memory (used with the + * HL_MEM_OP_MAP op) + * @host_virt_addr: address of allocated host memory. + * @hint_addr: requested virtual address of mapped memory. + * the driver will try to map the requested region + * to this hint address, as long as the address is valid and not + * already mapped. the user should check the returned address of + * the IOCTL to make sure he got the hint address. passing 0 + * here means that the driver will choose the address itself. + * @size: size of allocated host memory. + */ + struct { + __u64 host_virt_addr; + __u64 hint_addr; + __u64 mem_size; + } map_host; + + /** + * structure for mapping hw block (used with the + * HL_MEM_OP_MAP_BLOCK op) + * @block_addr:HW block address to map, a handle and size will + * be returned to the user and will be used to mmap the relevant + * block. only addresses from configuration space are allowed. + */ + struct { + __u64 block_addr; + } map_block; + + /** + * structure for unmapping host memory (used with the + * HL_MEM_OP_UNMAP op) + * @device_virt_addr: virtual address returned from + * HL_MEM_OP_MAP + */ + struct { + __u64 device_virt_addr; + } unmap; + + /** + * structure for exporting DMABUF object (used with + * the HL_MEM_OP_EXPORT_DMABUF_FD op) + * @addr: for Gaudi1, the driver expects a physical address + * inside the device's DRAM. this is because in Gaudi1 + * we don't have MMU that covers the device's DRAM. + * for all other ASICs, the driver expects a device + * virtual address that represents the start address of + * a mapped DRAM memory area inside the device. + * the address must be the same as was received from the + * driver during a previous HL_MEM_OP_MAP operation. + * @mem_size: size of memory to export. + * @offset: for Gaudi1, this value must be 0. For all other + * ASICs, the driver expects an offset inside of the memory area + * describe by addr. the offset represents the start + * address of that the exported dma-buf object + * describes. + */ + struct { + __u64 addr; + __u64 mem_size; + __u64 offset; + } export_dmabuf_fd; + }; + + __u32 op; + __u32 flags; + __u32 ctx_id; + __u32 num_of_elements; +}; + +struct hl_mem_out { + union { + /* + * Used for HL_MEM_OP_MAP as the virtual address that was + * assigned in the device VA space. + * A value of 0 means the requested operation failed. + */ + __u64 device_virt_addr; + + /* + * Used in HL_MEM_OP_ALLOC + * This is the assigned handle for the allocated memory + */ + __u64 handle; + + struct { + /* + * Used in HL_MEM_OP_MAP_BLOCK. + * This is the assigned handle for the mapped block + */ + __u64 block_handle; + + /* + * Used in HL_MEM_OP_MAP_BLOCK + * This is the size of the mapped block + */ + __u32 block_size; + + __u32 pad; + }; + + /* Returned in HL_MEM_OP_EXPORT_DMABUF_FD. Represents the + * DMA-BUF object that was created to describe a memory + * allocation on the device's memory space. The FD should be + * passed to the importer driver + */ + __s32 fd; + }; +}; + +union hl_mem_args { + struct hl_mem_in in; + struct hl_mem_out out; +}; + +#define HL_DEBUG_MAX_AUX_VALUES 10 + +struct hl_debug_params_etr { + /* Address in memory to allocate buffer */ + __u64 buffer_address; + + /* Size of buffer to allocate */ + __u64 buffer_size; + + /* Sink operation mode: SW fifo, HW fifo, Circular buffer */ + __u32 sink_mode; + __u32 pad; +}; + +struct hl_debug_params_etf { + /* Address in memory to allocate buffer */ + __u64 buffer_address; + + /* Size of buffer to allocate */ + __u64 buffer_size; + + /* Sink operation mode: SW fifo, HW fifo, Circular buffer */ + __u32 sink_mode; + __u32 pad; +}; + +struct hl_debug_params_stm { + /* Two bit masks for HW event and Stimulus Port */ + __u64 he_mask; + __u64 sp_mask; + + /* Trace source ID */ + __u32 id; + + /* Frequency for the timestamp register */ + __u32 frequency; +}; + +struct hl_debug_params_bmon { + /* Two address ranges that the user can request to filter */ + __u64 start_addr0; + __u64 addr_mask0; + + __u64 start_addr1; + __u64 addr_mask1; + + /* Capture window configuration */ + __u32 bw_win; + __u32 win_capture; + + /* Trace source ID */ + __u32 id; + + /* Control register */ + __u32 control; + + /* Two more address ranges that the user can request to filter */ + __u64 start_addr2; + __u64 end_addr2; + + __u64 start_addr3; + __u64 end_addr3; +}; + +struct hl_debug_params_spmu { + /* Event types selection */ + __u64 event_types[HL_DEBUG_MAX_AUX_VALUES]; + + /* Number of event types selection */ + __u32 event_types_num; + + /* TRC configuration register values */ + __u32 pmtrc_val; + __u32 trc_ctrl_host_val; + __u32 trc_en_host_val; +}; + +/* Opcode for ETR component */ +#define HL_DEBUG_OP_ETR 0 +/* Opcode for ETF component */ +#define HL_DEBUG_OP_ETF 1 +/* Opcode for STM component */ +#define HL_DEBUG_OP_STM 2 +/* Opcode for FUNNEL component */ +#define HL_DEBUG_OP_FUNNEL 3 +/* Opcode for BMON component */ +#define HL_DEBUG_OP_BMON 4 +/* Opcode for SPMU component */ +#define HL_DEBUG_OP_SPMU 5 +/* Opcode for timestamp (deprecated) */ +#define HL_DEBUG_OP_TIMESTAMP 6 +/* Opcode for setting the device into or out of debug mode. The enable + * variable should be 1 for enabling debug mode and 0 for disabling it + */ +#define HL_DEBUG_OP_SET_MODE 7 + +struct hl_debug_args { + /* + * Pointer to user input structure. + * This field is relevant to specific opcodes. + */ + __u64 input_ptr; + /* Pointer to user output structure */ + __u64 output_ptr; + /* Size of user input structure */ + __u32 input_size; + /* Size of user output structure */ + __u32 output_size; + /* HL_DEBUG_OP_* */ + __u32 op; + /* + * Register index in the component, taken from the debug_regs_index enum + * in the various ASIC header files + */ + __u32 reg_idx; + /* Enable/disable */ + __u32 enable; + /* Context ID - Currently not in use */ + __u32 ctx_id; +}; + +#define HL_IOCTL_INFO 0x00 +#define HL_IOCTL_CB 0x01 +#define HL_IOCTL_CS 0x02 +#define HL_IOCTL_WAIT_CS 0x03 +#define HL_IOCTL_MEMORY 0x04 +#define HL_IOCTL_DEBUG 0x05 + +/* + * Various information operations such as: + * - H/W IP information + * - Current dram usage + * + * The user calls this IOCTL with an opcode that describes the required + * information. The user should supply a pointer to a user-allocated memory + * chunk, which will be filled by the driver with the requested information. + * + * The user supplies the maximum amount of size to copy into the user's memory, + * in order to prevent data corruption in case of differences between the + * definitions of structures in kernel and userspace, e.g. in case of old + * userspace and new kernel driver + */ +#define DRM_IOCTL_HL_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_INFO, struct hl_info_args) + +/* + * Command Buffer + * - Request a Command Buffer + * - Destroy a Command Buffer + * + * The command buffers are memory blocks that reside in DMA-able address + * space and are physically contiguous so they can be accessed by the device + * directly. They are allocated using the coherent DMA API. + * + * When creating a new CB, the IOCTL returns a handle of it, and the user-space + * process needs to use that handle to mmap the buffer so it can access them. + * + * In some instances, the device must access the command buffer through the + * device's MMU, and thus its memory should be mapped. In these cases, user can + * indicate the driver that such a mapping is required. + * The resulting device virtual address will be used internally by the driver, + * and won't be returned to user. + * + */ +#define DRM_IOCTL_HL_CB \ + DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_CB, union hl_cb_args) + +/* + * Command Submission + * + * To submit work to the device, the user need to call this IOCTL with a set + * of JOBS. That set of JOBS constitutes a CS object. + * Each JOB will be enqueued on a specific queue, according to the user's input. + * There can be more then one JOB per queue. + * + * The CS IOCTL will receive two sets of JOBS. One set is for "restore" phase + * and a second set is for "execution" phase. + * The JOBS on the "restore" phase are enqueued only after context-switch + * (or if its the first CS for this context). The user can also order the + * driver to run the "restore" phase explicitly + * + * Goya/Gaudi: + * There are two types of queues - external and internal. External queues + * are DMA queues which transfer data from/to the Host. All other queues are + * internal. The driver will get completion notifications from the device only + * on JOBS which are enqueued in the external queues. + * + * Gaudi2 onwards: + * There is a single type of queue for all types of engines, either DMA engines + * for transfers from/to the host or inside the device, or compute engines. + * The driver will get completion notifications from the device for all queues. + * + * For jobs on external queues, the user needs to create command buffers + * through the CB ioctl and give the CB's handle to the CS ioctl. For jobs on + * internal queues, the user needs to prepare a "command buffer" with packets + * on either the device SRAM/DRAM or the host, and give the device address of + * that buffer to the CS ioctl. + * For jobs on H/W queues both options of command buffers are valid. + * + * This IOCTL is asynchronous in regard to the actual execution of the CS. This + * means it returns immediately after ALL the JOBS were enqueued on their + * relevant queues. Therefore, the user mustn't assume the CS has been completed + * or has even started to execute. + * + * Upon successful enqueue, the IOCTL returns a sequence number which the user + * can use with the "Wait for CS" IOCTL to check whether the handle's CS + * non-internal JOBS have been completed. Note that if the CS has internal JOBS + * which can execute AFTER the external JOBS have finished, the driver might + * report that the CS has finished executing BEFORE the internal JOBS have + * actually finished executing. + * + * Even though the sequence number increments per CS, the user can NOT + * automatically assume that if CS with sequence number N finished, then CS + * with sequence number N-1 also finished. The user can make this assumption if + * and only if CS N and CS N-1 are exactly the same (same CBs for the same + * queues). + */ +#define DRM_IOCTL_HL_CS \ + DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_CS, union hl_cs_args) + +/* + * Wait for Command Submission + * + * The user can call this IOCTL with a handle it received from the CS IOCTL + * to wait until the handle's CS has finished executing. The user will wait + * inside the kernel until the CS has finished or until the user-requested + * timeout has expired. + * + * If the timeout value is 0, the driver won't sleep at all. It will check + * the status of the CS and return immediately + * + * The return value of the IOCTL is a standard Linux error code. The possible + * values are: + * + * EINTR - Kernel waiting has been interrupted, e.g. due to OS signal + * that the user process received + * ETIMEDOUT - The CS has caused a timeout on the device + * EIO - The CS was aborted (usually because the device was reset) + * ENODEV - The device wants to do hard-reset (so user need to close FD) + * + * The driver also returns a custom define in case the IOCTL call returned 0. + * The define can be one of the following: + * + * HL_WAIT_CS_STATUS_COMPLETED - The CS has been completed successfully (0) + * HL_WAIT_CS_STATUS_BUSY - The CS is still executing (0) + * HL_WAIT_CS_STATUS_TIMEDOUT - The CS has caused a timeout on the device + * (ETIMEDOUT) + * HL_WAIT_CS_STATUS_ABORTED - The CS was aborted, usually because the + * device was reset (EIO) + */ +#define DRM_IOCTL_HL_WAIT_CS \ + DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_WAIT_CS, union hl_wait_cs_args) + +/* + * Memory + * - Map host memory to device MMU + * - Unmap host memory from device MMU + * + * This IOCTL allows the user to map host memory to the device MMU + * + * For host memory, the IOCTL doesn't allocate memory. The user is supposed + * to allocate the memory in user-space (malloc/new). The driver pins the + * physical pages (up to the allowed limit by the OS), assigns a virtual + * address in the device VA space and initializes the device MMU. + * + * There is an option for the user to specify the requested virtual address. + * + */ +#define DRM_IOCTL_HL_MEMORY \ + DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_MEMORY, union hl_mem_args) + +/* + * Debug + * - Enable/disable the ETR/ETF/FUNNEL/STM/BMON/SPMU debug traces + * + * This IOCTL allows the user to get debug traces from the chip. + * + * Before the user can send configuration requests of the various + * debug/profile engines, it needs to set the device into debug mode. + * This is because the debug/profile infrastructure is shared component in the + * device and we can't allow multiple users to access it at the same time. + * + * Once a user set the device into debug mode, the driver won't allow other + * users to "work" with the device, i.e. open a FD. If there are multiple users + * opened on the device, the driver won't allow any user to debug the device. + * + * For each configuration request, the user needs to provide the register index + * and essential data such as buffer address and size. + * + * Once the user has finished using the debug/profile engines, he should + * set the device into non-debug mode, i.e. disable debug mode. + * + * The driver can decide to "kick out" the user if he abuses this interface. + * + */ +#define DRM_IOCTL_HL_DEBUG \ + DRM_IOWR(DRM_COMMAND_BASE + HL_IOCTL_DEBUG, struct hl_debug_args) + +#define HL_COMMAND_START (DRM_COMMAND_BASE + HL_IOCTL_INFO) +#define HL_COMMAND_END (DRM_COMMAND_BASE + HL_IOCTL_DEBUG + 1) + +#endif /* HABANALABS_H_ */ diff --git a/include/arch/x86_64/drm/i915_drm.h b/include/arch/x86_64/drm/i915_drm.h new file mode 100644 index 00000000..68f08e16 --- /dev/null +++ b/include/arch/x86_64/drm/i915_drm.h @@ -0,0 +1,4040 @@ +/* + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _I915_DRM_H_ +#define _I915_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints. + */ + +/** + * DOC: uevents generated by i915 on its device node + * + * I915_L3_PARITY_UEVENT - Generated when the driver receives a parity mismatch + * event from the GPU L3 cache. Additional information supplied is ROW, + * BANK, SUBBANK, SLICE of the affected cacheline. Userspace should keep + * track of these events, and if a specific cache-line seems to have a + * persistent error, remap it with the L3 remapping tool supplied in + * intel-gpu-tools. The value supplied with the event is always 1. + * + * I915_ERROR_UEVENT - Generated upon error detection, currently only via + * hangcheck. The error detection event is a good indicator of when things + * began to go badly. The value supplied with the event is a 1 upon error + * detection, and a 0 upon reset completion, signifying no more error + * exists. NOTE: Disabling hangcheck or reset via module parameter will + * cause the related events to not be seen. + * + * I915_RESET_UEVENT - Event is generated just before an attempt to reset the + * GPU. The value supplied with the event is always 1. NOTE: Disable + * reset via module parameter will cause this event to not be seen. + */ +#define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR" +#define I915_ERROR_UEVENT "ERROR" +#define I915_RESET_UEVENT "RESET" + +/** + * struct i915_user_extension - Base class for defining a chain of extensions + * + * Many interfaces need to grow over time. In most cases we can simply + * extend the struct and have userspace pass in more data. Another option, + * as demonstrated by Vulkan's approach to providing extensions for forward + * and backward compatibility, is to use a list of optional structs to + * provide those extra details. + * + * The key advantage to using an extension chain is that it allows us to + * redefine the interface more easily than an ever growing struct of + * increasing complexity, and for large parts of that interface to be + * entirely optional. The downside is more pointer chasing; chasing across + * the boundary with pointers encapsulated inside u64. + * + * Example chaining: + * + * .. code-block:: C + * + * struct i915_user_extension ext3 { + * .next_extension = 0, // end + * .name = ..., + * }; + * struct i915_user_extension ext2 { + * .next_extension = (uintptr_t)&ext3, + * .name = ..., + * }; + * struct i915_user_extension ext1 { + * .next_extension = (uintptr_t)&ext2, + * .name = ..., + * }; + * + * Typically the struct i915_user_extension would be embedded in some uAPI + * struct, and in this case we would feed it the head of the chain(i.e ext1), + * which would then apply all of the above extensions. + * + */ +struct i915_user_extension { + /** + * @next_extension: + * + * Pointer to the next struct i915_user_extension, or zero if the end. + */ + __u64 next_extension; + /** + * @name: Name of the extension. + * + * Note that the name here is just some integer. + * + * Also note that the name space for this is not global for the whole + * driver, but rather its scope/meaning is limited to the specific piece + * of uAPI which has embedded the struct i915_user_extension. + */ + __u32 name; + /** + * @flags: MBZ + * + * All undefined bits must be zero. + */ + __u32 flags; + /** + * @rsvd: MBZ + * + * Reserved for future use; must be zero. + */ + __u32 rsvd[4]; +}; + +/* + * MOCS indexes used for GPU surfaces, defining the cacheability of the + * surface data and the coherency for this data wrt. CPU vs. GPU accesses. + */ +enum i915_mocs_table_index { + /* + * Not cached anywhere, coherency between CPU and GPU accesses is + * guaranteed. + */ + I915_MOCS_UNCACHED, + /* + * Cacheability and coherency controlled by the kernel automatically + * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current + * usage of the surface (used for display scanout or not). + */ + I915_MOCS_PTE, + /* + * Cached in all GPU caches available on the platform. + * Coherency between CPU and GPU accesses to the surface is not + * guaranteed without extra synchronization. + */ + I915_MOCS_CACHED, +}; + +/** + * enum drm_i915_gem_engine_class - uapi engine type enumeration + * + * Different engines serve different roles, and there may be more than one + * engine serving each role. This enum provides a classification of the role + * of the engine, which may be used when requesting operations to be performed + * on a certain subset of engines, or for providing information about that + * group. + */ +enum drm_i915_gem_engine_class { + /** + * @I915_ENGINE_CLASS_RENDER: + * + * Render engines support instructions used for 3D, Compute (GPGPU), + * and programmable media workloads. These instructions fetch data and + * dispatch individual work items to threads that operate in parallel. + * The threads run small programs (called "kernels" or "shaders") on + * the GPU's execution units (EUs). + */ + I915_ENGINE_CLASS_RENDER = 0, + + /** + * @I915_ENGINE_CLASS_COPY: + * + * Copy engines (also referred to as "blitters") support instructions + * that move blocks of data from one location in memory to another, + * or that fill a specified location of memory with fixed data. + * Copy engines can perform pre-defined logical or bitwise operations + * on the source, destination, or pattern data. + */ + I915_ENGINE_CLASS_COPY = 1, + + /** + * @I915_ENGINE_CLASS_VIDEO: + * + * Video engines (also referred to as "bit stream decode" (BSD) or + * "vdbox") support instructions that perform fixed-function media + * decode and encode. + */ + I915_ENGINE_CLASS_VIDEO = 2, + + /** + * @I915_ENGINE_CLASS_VIDEO_ENHANCE: + * + * Video enhancement engines (also referred to as "vebox") support + * instructions related to image enhancement. + */ + I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, + + /** + * @I915_ENGINE_CLASS_COMPUTE: + * + * Compute engines support a subset of the instructions available + * on render engines: compute engines support Compute (GPGPU) and + * programmable media workloads, but do not support the 3D pipeline. + */ + I915_ENGINE_CLASS_COMPUTE = 4, + + /* Values in this enum should be kept compact. */ + + /** + * @I915_ENGINE_CLASS_INVALID: + * + * Placeholder value to represent an invalid engine class assignment. + */ + I915_ENGINE_CLASS_INVALID = -1 +}; + +/** + * struct i915_engine_class_instance - Engine class/instance identifier + * + * There may be more than one engine fulfilling any role within the system. + * Each engine of a class is given a unique instance number and therefore + * any engine can be specified by its class:instance tuplet. APIs that allow + * access to any engine in the system will use struct i915_engine_class_instance + * for this identification. + */ +struct i915_engine_class_instance { + /** + * @engine_class: + * + * Engine class from enum drm_i915_gem_engine_class + */ + __u16 engine_class; +#define I915_ENGINE_CLASS_INVALID_NONE -1 +#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2 + + /** + * @engine_instance: + * + * Engine instance. + */ + __u16 engine_instance; +}; + +/** + * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915 + * + */ + +enum drm_i915_pmu_engine_sample { + I915_SAMPLE_BUSY = 0, + I915_SAMPLE_WAIT = 1, + I915_SAMPLE_SEMA = 2 +}; + +#define I915_PMU_SAMPLE_BITS (4) +#define I915_PMU_SAMPLE_MASK (0xf) +#define I915_PMU_SAMPLE_INSTANCE_BITS (8) +#define I915_PMU_CLASS_SHIFT \ + (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS) + +#define __I915_PMU_ENGINE(class, instance, sample) \ + ((class) << I915_PMU_CLASS_SHIFT | \ + (instance) << I915_PMU_SAMPLE_BITS | (sample)) + +#define I915_PMU_ENGINE_BUSY(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY) + +#define I915_PMU_ENGINE_WAIT(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT) + +#define I915_PMU_ENGINE_SEMA(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA) + +/* + * Top 4 bits of every non-engine counter are GT id. + */ +#define __I915_PMU_GT_SHIFT (60) + +#define ___I915_PMU_OTHER(gt, x) \ + (((__u64)__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) | \ + ((__u64)(gt) << __I915_PMU_GT_SHIFT)) + +#define __I915_PMU_OTHER(x) ___I915_PMU_OTHER(0, x) + +#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) +#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) +#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) +#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) +#define I915_PMU_SOFTWARE_GT_AWAKE_TIME __I915_PMU_OTHER(4) + +#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY + +#define __I915_PMU_ACTUAL_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 0) +#define __I915_PMU_REQUESTED_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 1) +#define __I915_PMU_INTERRUPTS(gt) ___I915_PMU_OTHER(gt, 2) +#define __I915_PMU_RC6_RESIDENCY(gt) ___I915_PMU_OTHER(gt, 3) +#define __I915_PMU_SOFTWARE_GT_AWAKE_TIME(gt) ___I915_PMU_OTHER(gt, 4) + +/* Each region is a minimum of 16k, and there are at most 255 of them. + */ +#define I915_NR_TEX_REGIONS \ + 255 /* table size 2k - maximum due to use \ + * of chars for next/prev indices */ +#define I915_LOG_MIN_TEX_REGION_SIZE 14 + +typedef struct _drm_i915_init { + enum { + I915_INIT_DMA = 0x01, + I915_CLEANUP_DMA = 0x02, + I915_RESUME_DMA = 0x03 + } func; + unsigned int mmio_offset; + int sarea_priv_offset; + unsigned int ring_start; + unsigned int ring_end; + unsigned int ring_size; + unsigned int front_offset; + unsigned int back_offset; + unsigned int depth_offset; + unsigned int w; + unsigned int h; + unsigned int pitch; + unsigned int pitch_bits; + unsigned int back_pitch; + unsigned int depth_pitch; + unsigned int cpp; + unsigned int chipset; +} drm_i915_init_t; + +typedef struct _drm_i915_sarea { + struct drm_tex_region texList[I915_NR_TEX_REGIONS + 1]; + int last_upload; /* last time texture was uploaded */ + int last_enqueue; /* last time a buffer was enqueued */ + int last_dispatch; /* age of the most recently dispatched buffer */ + int ctxOwner; /* last context to upload state */ + int texAge; + int pf_enabled; /* is pageflipping allowed? */ + int pf_active; + int pf_current_page; /* which buffer is being displayed? */ + int perf_boxes; /* performance boxes to be displayed */ + int width, height; /* screen size in pixels */ + + drm_handle_t front_handle; + int front_offset; + int front_size; + + drm_handle_t back_handle; + int back_offset; + int back_size; + + drm_handle_t depth_handle; + int depth_offset; + int depth_size; + + drm_handle_t tex_handle; + int tex_offset; + int tex_size; + int log_tex_granularity; + int pitch; + int rotation; /* 0, 90, 180 or 270 */ + int rotated_offset; + int rotated_size; + int rotated_pitch; + int virtualX, virtualY; + + unsigned int front_tiled; + unsigned int back_tiled; + unsigned int depth_tiled; + unsigned int rotated_tiled; + unsigned int rotated2_tiled; + + int pipeA_x; + int pipeA_y; + int pipeA_w; + int pipeA_h; + int pipeB_x; + int pipeB_y; + int pipeB_w; + int pipeB_h; + + /* fill out some space for old userspace triple buffer */ + drm_handle_t unused_handle; + __u32 unused1, unused2, unused3; + + /* buffer object handles for static buffers. May change + * over the lifetime of the client. + */ + __u32 front_bo_handle; + __u32 back_bo_handle; + __u32 unused_bo_handle; + __u32 depth_bo_handle; + +} drm_i915_sarea_t; + +/* due to userspace building against these headers we need some compat here */ +#define planeA_x pipeA_x +#define planeA_y pipeA_y +#define planeA_w pipeA_w +#define planeA_h pipeA_h +#define planeB_x pipeB_x +#define planeB_y pipeB_y +#define planeB_w pipeB_w +#define planeB_h pipeB_h + +/* Flags for perf_boxes + */ +#define I915_BOX_RING_EMPTY 0x1 +#define I915_BOX_FLIP 0x2 +#define I915_BOX_WAIT 0x4 +#define I915_BOX_TEXTURE_LOAD 0x8 +#define I915_BOX_LOST_CONTEXT 0x10 + +/* + * i915 specific ioctls. + * + * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie + * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset + * against DRM_COMMAND_BASE and should be between [0x0, 0x60). + */ +#define DRM_I915_INIT 0x00 +#define DRM_I915_FLUSH 0x01 +#define DRM_I915_FLIP 0x02 +#define DRM_I915_BATCHBUFFER 0x03 +#define DRM_I915_IRQ_EMIT 0x04 +#define DRM_I915_IRQ_WAIT 0x05 +#define DRM_I915_GETPARAM 0x06 +#define DRM_I915_SETPARAM 0x07 +#define DRM_I915_ALLOC 0x08 +#define DRM_I915_FREE 0x09 +#define DRM_I915_INIT_HEAP 0x0a +#define DRM_I915_CMDBUFFER 0x0b +#define DRM_I915_DESTROY_HEAP 0x0c +#define DRM_I915_SET_VBLANK_PIPE 0x0d +#define DRM_I915_GET_VBLANK_PIPE 0x0e +#define DRM_I915_VBLANK_SWAP 0x0f +#define DRM_I915_HWS_ADDR 0x11 +#define DRM_I915_GEM_INIT 0x13 +#define DRM_I915_GEM_EXECBUFFER 0x14 +#define DRM_I915_GEM_PIN 0x15 +#define DRM_I915_GEM_UNPIN 0x16 +#define DRM_I915_GEM_BUSY 0x17 +#define DRM_I915_GEM_THROTTLE 0x18 +#define DRM_I915_GEM_ENTERVT 0x19 +#define DRM_I915_GEM_LEAVEVT 0x1a +#define DRM_I915_GEM_CREATE 0x1b +#define DRM_I915_GEM_PREAD 0x1c +#define DRM_I915_GEM_PWRITE 0x1d +#define DRM_I915_GEM_MMAP 0x1e +#define DRM_I915_GEM_SET_DOMAIN 0x1f +#define DRM_I915_GEM_SW_FINISH 0x20 +#define DRM_I915_GEM_SET_TILING 0x21 +#define DRM_I915_GEM_GET_TILING 0x22 +#define DRM_I915_GEM_GET_APERTURE 0x23 +#define DRM_I915_GEM_MMAP_GTT 0x24 +#define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25 +#define DRM_I915_GEM_MADVISE 0x26 +#define DRM_I915_OVERLAY_PUT_IMAGE 0x27 +#define DRM_I915_OVERLAY_ATTRS 0x28 +#define DRM_I915_GEM_EXECBUFFER2 0x29 +#define DRM_I915_GEM_EXECBUFFER2_WR DRM_I915_GEM_EXECBUFFER2 +#define DRM_I915_GET_SPRITE_COLORKEY 0x2a +#define DRM_I915_SET_SPRITE_COLORKEY 0x2b +#define DRM_I915_GEM_WAIT 0x2c +#define DRM_I915_GEM_CONTEXT_CREATE 0x2d +#define DRM_I915_GEM_CONTEXT_DESTROY 0x2e +#define DRM_I915_GEM_SET_CACHING 0x2f +#define DRM_I915_GEM_GET_CACHING 0x30 +#define DRM_I915_REG_READ 0x31 +#define DRM_I915_GET_RESET_STATS 0x32 +#define DRM_I915_GEM_USERPTR 0x33 +#define DRM_I915_GEM_CONTEXT_GETPARAM 0x34 +#define DRM_I915_GEM_CONTEXT_SETPARAM 0x35 +#define DRM_I915_PERF_OPEN 0x36 +#define DRM_I915_PERF_ADD_CONFIG 0x37 +#define DRM_I915_PERF_REMOVE_CONFIG 0x38 +#define DRM_I915_QUERY 0x39 +#define DRM_I915_GEM_VM_CREATE 0x3a +#define DRM_I915_GEM_VM_DESTROY 0x3b +#define DRM_I915_GEM_CREATE_EXT 0x3c +/* Must be kept compact -- no holes */ + +#define DRM_IOCTL_I915_INIT \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) +#define DRM_IOCTL_I915_FLUSH DRM_IO(DRM_COMMAND_BASE + DRM_I915_FLUSH) +#define DRM_IOCTL_I915_FLIP DRM_IO(DRM_COMMAND_BASE + DRM_I915_FLIP) +#define DRM_IOCTL_I915_BATCHBUFFER \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_BATCHBUFFER, drm_i915_batchbuffer_t) +#define DRM_IOCTL_I915_IRQ_EMIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_IRQ_EMIT, drm_i915_irq_emit_t) +#define DRM_IOCTL_I915_IRQ_WAIT \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_IRQ_WAIT, drm_i915_irq_wait_t) +#define DRM_IOCTL_I915_GETPARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GETPARAM, drm_i915_getparam_t) +#define DRM_IOCTL_I915_SETPARAM \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_SETPARAM, drm_i915_setparam_t) +#define DRM_IOCTL_I915_ALLOC \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_ALLOC, drm_i915_mem_alloc_t) +#define DRM_IOCTL_I915_FREE \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_FREE, drm_i915_mem_free_t) +#define DRM_IOCTL_I915_INIT_HEAP \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_INIT_HEAP, drm_i915_mem_init_heap_t) +#define DRM_IOCTL_I915_CMDBUFFER \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_CMDBUFFER, drm_i915_cmdbuffer_t) +#define DRM_IOCTL_I915_DESTROY_HEAP \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_DESTROY_HEAP, \ + drm_i915_mem_destroy_heap_t) +#define DRM_IOCTL_I915_SET_VBLANK_PIPE \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_SET_VBLANK_PIPE, \ + drm_i915_vblank_pipe_t) +#define DRM_IOCTL_I915_GET_VBLANK_PIPE \ + DRM_IOR(DRM_COMMAND_BASE + DRM_I915_GET_VBLANK_PIPE, \ + drm_i915_vblank_pipe_t) +#define DRM_IOCTL_I915_VBLANK_SWAP \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_VBLANK_SWAP, \ + drm_i915_vblank_swap_t) +#define DRM_IOCTL_I915_HWS_ADDR \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init) +#define DRM_IOCTL_I915_GEM_INIT \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init) +#define DRM_IOCTL_I915_GEM_EXECBUFFER \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, \ + struct drm_i915_gem_execbuffer) +#define DRM_IOCTL_I915_GEM_EXECBUFFER2 \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, \ + struct drm_i915_gem_execbuffer2) +#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, \ + struct drm_i915_gem_execbuffer2) +#define DRM_IOCTL_I915_GEM_PIN \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin) +#define DRM_IOCTL_I915_GEM_UNPIN \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, \ + struct drm_i915_gem_unpin) +#define DRM_IOCTL_I915_GEM_BUSY \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) +#define DRM_IOCTL_I915_GEM_SET_CACHING \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_SET_CACHING, \ + struct drm_i915_gem_caching) +#define DRM_IOCTL_I915_GEM_GET_CACHING \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_GET_CACHING, \ + struct drm_i915_gem_caching) +#define DRM_IOCTL_I915_GEM_THROTTLE \ + DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_THROTTLE) +#define DRM_IOCTL_I915_GEM_ENTERVT \ + DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT) +#define DRM_IOCTL_I915_GEM_LEAVEVT \ + DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT) +#define DRM_IOCTL_I915_GEM_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, \ + struct drm_i915_gem_create) +#define DRM_IOCTL_I915_GEM_CREATE_EXT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE_EXT, \ + struct drm_i915_gem_create_ext) +#define DRM_IOCTL_I915_GEM_PREAD \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, \ + struct drm_i915_gem_pread) +#define DRM_IOCTL_I915_GEM_PWRITE \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, \ + struct drm_i915_gem_pwrite) +#define DRM_IOCTL_I915_GEM_MMAP \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) +#define DRM_IOCTL_I915_GEM_MMAP_GTT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, \ + struct drm_i915_gem_mmap_gtt) +#define DRM_IOCTL_I915_GEM_MMAP_OFFSET \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, \ + struct drm_i915_gem_mmap_offset) +#define DRM_IOCTL_I915_GEM_SET_DOMAIN \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, \ + struct drm_i915_gem_set_domain) +#define DRM_IOCTL_I915_GEM_SW_FINISH \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_SW_FINISH, \ + struct drm_i915_gem_sw_finish) +#define DRM_IOCTL_I915_GEM_SET_TILING \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, \ + struct drm_i915_gem_set_tiling) +#define DRM_IOCTL_I915_GEM_GET_TILING \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, \ + struct drm_i915_gem_get_tiling) +#define DRM_IOCTL_I915_GEM_GET_APERTURE \ + DRM_IOR(DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, \ + struct drm_i915_gem_get_aperture) +#define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, \ + struct drm_i915_get_pipe_from_crtc_id) +#define DRM_IOCTL_I915_GEM_MADVISE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, \ + struct drm_i915_gem_madvise) +#define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_OVERLAY_PUT_IMAGE, \ + struct drm_intel_overlay_put_image) +#define DRM_IOCTL_I915_OVERLAY_ATTRS \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, \ + struct drm_intel_overlay_attrs) +#define DRM_IOCTL_I915_SET_SPRITE_COLORKEY \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, \ + struct drm_intel_sprite_colorkey) +#define DRM_IOCTL_I915_GET_SPRITE_COLORKEY \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_SPRITE_COLORKEY, \ + struct drm_intel_sprite_colorkey) +#define DRM_IOCTL_I915_GEM_WAIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait) +#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, \ + struct drm_i915_gem_context_create) +#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, \ + struct drm_i915_gem_context_create_ext) +#define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, \ + struct drm_i915_gem_context_destroy) +#define DRM_IOCTL_I915_REG_READ \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read) +#define DRM_IOCTL_I915_GET_RESET_STATS \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, \ + struct drm_i915_reset_stats) +#define DRM_IOCTL_I915_GEM_USERPTR \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, \ + struct drm_i915_gem_userptr) +#define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, \ + struct drm_i915_gem_context_param) +#define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, \ + struct drm_i915_gem_context_param) +#define DRM_IOCTL_I915_PERF_OPEN \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, \ + struct drm_i915_perf_open_param) +#define DRM_IOCTL_I915_PERF_ADD_CONFIG \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, \ + struct drm_i915_perf_oa_config) +#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64) +#define DRM_IOCTL_I915_QUERY \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query) +#define DRM_IOCTL_I915_GEM_VM_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, \ + struct drm_i915_gem_vm_control) +#define DRM_IOCTL_I915_GEM_VM_DESTROY \ + DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, \ + struct drm_i915_gem_vm_control) + +/* Allow drivers to submit batchbuffers directly to hardware, relying + * on the security mechanisms provided by hardware. + */ +typedef struct drm_i915_batchbuffer { + int start; /* agp offset */ + int used; /* nr bytes in use */ + int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ + int DR4; /* window origin for GFX_OP_DRAWRECT_INFO */ + int num_cliprects; /* mulitpass with multiple cliprects? */ + struct drm_clip_rect *cliprects; /* pointer to userspace cliprects */ +} drm_i915_batchbuffer_t; + +/* As above, but pass a pointer to userspace buffer which can be + * validated by the kernel prior to sending to hardware. + */ +typedef struct _drm_i915_cmdbuffer { + char *buf; /* pointer to userspace command buffer */ + int sz; /* nr bytes in buf */ + int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ + int DR4; /* window origin for GFX_OP_DRAWRECT_INFO */ + int num_cliprects; /* mulitpass with multiple cliprects? */ + struct drm_clip_rect *cliprects; /* pointer to userspace cliprects */ +} drm_i915_cmdbuffer_t; + +/* Userspace can request & wait on irq's: + */ +typedef struct drm_i915_irq_emit { + int *irq_seq; +} drm_i915_irq_emit_t; + +typedef struct drm_i915_irq_wait { + int irq_seq; +} drm_i915_irq_wait_t; + +/* + * Different modes of per-process Graphics Translation Table, + * see I915_PARAM_HAS_ALIASING_PPGTT + */ +#define I915_GEM_PPGTT_NONE 0 +#define I915_GEM_PPGTT_ALIASING 1 +#define I915_GEM_PPGTT_FULL 2 + +/* Ioctl to query kernel params: + */ +#define I915_PARAM_IRQ_ACTIVE 1 +#define I915_PARAM_ALLOW_BATCHBUFFER 2 +#define I915_PARAM_LAST_DISPATCH 3 +#define I915_PARAM_CHIPSET_ID 4 +#define I915_PARAM_HAS_GEM 5 +#define I915_PARAM_NUM_FENCES_AVAIL 6 +#define I915_PARAM_HAS_OVERLAY 7 +#define I915_PARAM_HAS_PAGEFLIPPING 8 +#define I915_PARAM_HAS_EXECBUF2 9 +#define I915_PARAM_HAS_BSD 10 +#define I915_PARAM_HAS_BLT 11 +#define I915_PARAM_HAS_RELAXED_FENCING 12 +#define I915_PARAM_HAS_COHERENT_RINGS 13 +#define I915_PARAM_HAS_EXEC_CONSTANTS 14 +#define I915_PARAM_HAS_RELAXED_DELTA 15 +#define I915_PARAM_HAS_GEN7_SOL_RESET 16 +#define I915_PARAM_HAS_LLC 17 +#define I915_PARAM_HAS_ALIASING_PPGTT 18 +#define I915_PARAM_HAS_WAIT_TIMEOUT 19 +#define I915_PARAM_HAS_SEMAPHORES 20 +#define I915_PARAM_HAS_PRIME_VMAP_FLUSH 21 +#define I915_PARAM_HAS_VEBOX 22 +#define I915_PARAM_HAS_SECURE_BATCHES 23 +#define I915_PARAM_HAS_PINNED_BATCHES 24 +#define I915_PARAM_HAS_EXEC_NO_RELOC 25 +#define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 +#define I915_PARAM_HAS_WT 27 +#define I915_PARAM_CMD_PARSER_VERSION 28 +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29 +#define I915_PARAM_MMAP_VERSION 30 +#define I915_PARAM_HAS_BSD2 31 +#define I915_PARAM_REVISION 32 +#define I915_PARAM_SUBSLICE_TOTAL 33 +#define I915_PARAM_EU_TOTAL 34 +#define I915_PARAM_HAS_GPU_RESET 35 +#define I915_PARAM_HAS_RESOURCE_STREAMER 36 +#define I915_PARAM_HAS_EXEC_SOFTPIN 37 +#define I915_PARAM_HAS_POOLED_EU 38 +#define I915_PARAM_MIN_EU_IN_POOL 39 +#define I915_PARAM_MMAP_GTT_VERSION 40 + +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution + * priorities and the driver will attempt to execute batches in priority order. + * The param returns a capability bitmask, nonzero implies that the scheduler + * is enabled, with different features present according to the mask. + * + * The initial priority for each batch is supplied by the context and is + * controlled via I915_CONTEXT_PARAM_PRIORITY. + */ +#define I915_PARAM_HAS_SCHEDULER 41 +#define I915_SCHEDULER_CAP_ENABLED (1ul << 0) +#define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) +#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) +#define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) +#define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4) +/* + * Indicates the 2k user priority levels are statically mapped into 3 buckets as + * follows: + * + * -1k to -1 Low priority + * 0 Normal priority + * 1 to 1k Highest priority + */ +#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5) + +/* + * Query the status of HuC load. + * + * The query can fail in the following scenarios with the listed error codes: + * -ENODEV if HuC is not present on this platform, + * -EOPNOTSUPP if HuC firmware usage is disabled, + * -ENOPKG if HuC firmware fetch failed, + * -ENOEXEC if HuC firmware is invalid or mismatched, + * -ENOMEM if i915 failed to prepare the FW objects for transfer to the uC, + * -EIO if the FW transfer or the FW authentication failed. + * + * If the IOCTL is successful, the returned parameter will be set to one of the + * following values: + * * 0 if HuC firmware load is not complete, + * * 1 if HuC firmware is loaded and fully authenticated, + * * 2 if HuC firmware is loaded and authenticated for clear media only + */ +#define I915_PARAM_HUC_STATUS 42 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of + * synchronisation with implicit fencing on individual objects. + * See EXEC_OBJECT_ASYNC. + */ +#define I915_PARAM_HAS_EXEC_ASYNC 43 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support - + * both being able to pass in a sync_file fd to wait upon before executing, + * and being able to return a new sync_file fd that is signaled when the + * current request is complete. See I915_EXEC_FENCE_IN and I915_EXEC_FENCE_OUT. + */ +#define I915_PARAM_HAS_EXEC_FENCE 44 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture + * user-specified buffers for post-mortem debugging of GPU hangs. See + * EXEC_OBJECT_CAPTURE. + */ +#define I915_PARAM_HAS_EXEC_CAPTURE 45 + +#define I915_PARAM_SLICE_MASK 46 + +/* Assuming it's uniform for each slice, this queries the mask of subslices + * per-slice for this system. + */ +#define I915_PARAM_SUBSLICE_MASK 47 + +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying the batch buffer + * as the first execobject as opposed to the last. See I915_EXEC_BATCH_FIRST. + */ +#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY. + */ +#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 + +/* + * Query whether every context (both per-file default and user created) is + * isolated (insofar as HW supports). If this parameter is not true, then + * freshly created contexts may inherit values from an existing context, + * rather than default HW values. If true, it also ensures (insofar as HW + * supports) that all state set by this context will not leak to any other + * context. + * + * As not every engine across every gen support contexts, the returned + * value reports the support of context isolation for individual engines by + * returning a bitmask of each engine class set to true if that class supports + * isolation. + */ +#define I915_PARAM_HAS_CONTEXT_ISOLATION 50 + +/* Frequency of the command streamer timestamps given by the *_TIMESTAMP + * registers. This used to be fixed per platform but from CNL onwards, this + * might vary depending on the parts. + */ +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51 + +/* + * Once upon a time we supposed that writes through the GGTT would be + * immediately in physical memory (once flushed out of the CPU path). However, + * on a few different processors and chipsets, this is not necessarily the case + * as the writes appear to be buffered internally. Thus a read of the backing + * storage (physical memory) via a different path (with different physical tags + * to the indirect write via the GGTT) will see stale values from before + * the GGTT write. Inside the kernel, we can for the most part keep track of + * the different read/write domains in use (e.g. set-domain), but the assumption + * of coherency is baked into the ABI, hence reporting its true state in this + * parameter. + * + * Reports true when writes via mmap_gtt are immediately visible following an + * lfence to flush the WCB. + * + * Reports false when writes via mmap_gtt are indeterminately delayed in an in + * internal buffer and are _not_ immediately visible to third parties accessing + * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC + * communications channel when reporting false is strongly disadvised. + */ +#define I915_PARAM_MMAP_GTT_COHERENT 52 + +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel + * execution through use of explicit fence support. + * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT. + */ +#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53 + +/* + * Revision of the i915-perf uAPI. The value returned helps determine what + * i915-perf features are available. See drm_i915_perf_property_id. + */ +#define I915_PARAM_PERF_REVISION 54 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * timeline syncobj through drm_i915_gem_execbuffer_ext_timeline_fences. See + * I915_EXEC_USE_EXTENSIONS. + */ +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 + +/* Query if the kernel supports the I915_USERPTR_PROBE flag. */ +#define I915_PARAM_HAS_USERPTR_PROBE 56 + +/* + * Frequency of the timestamps in OA reports. This used to be the same as the CS + * timestamp frequency, but differs on some platforms. + */ +#define I915_PARAM_OA_TIMESTAMP_FREQUENCY 57 + +/* + * Query the status of PXP support in i915. + * + * The query can fail in the following scenarios with the listed error codes: + * -ENODEV = PXP support is not available on the GPU device or in the + * kernel due to missing component drivers or kernel configs. + * + * If the IOCTL is successful, the returned parameter will be set to one of + * the following values: + * 1 = PXP feature is supported and is ready for use. + * 2 = PXP feature is supported but should be ready soon (pending + * initialization of non-i915 system dependencies). + * + * NOTE: When param is supported (positive return values), user space should + * still refer to the GEM PXP context-creation UAPI header specs to be + * aware of possible failure due to system state machine at the time. + */ +#define I915_PARAM_PXP_STATUS 58 + +/* + * Query if kernel allows marking a context to send a Freq hint to SLPC. This + * will enable use of the strategies allowed by the SLPC algorithm. + */ +#define I915_PARAM_HAS_CONTEXT_FREQ_HINT 59 + +/* Must be kept compact -- no holes and well documented */ + +/** + * struct drm_i915_getparam - Driver parameter query structure. + */ +struct drm_i915_getparam { + /** @param: Driver parameter to query. */ + __s32 param; + + /** + * @value: Address of memory where queried value should be put. + * + * WARNING: Using pointers instead of fixed-size u64 means we need to + * write compat32 code. Don't repeat this mistake. + */ + int *value; +}; + +/** + * typedef drm_i915_getparam_t - Driver parameter query structure. + * See struct drm_i915_getparam. + */ +typedef struct drm_i915_getparam drm_i915_getparam_t; + +/* Ioctl to set kernel params: + */ +#define I915_SETPARAM_USE_MI_BATCHBUFFER_START 1 +#define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 +#define I915_SETPARAM_ALLOW_BATCHBUFFER 3 +#define I915_SETPARAM_NUM_USED_FENCES 4 +/* Must be kept compact -- no holes */ + +typedef struct drm_i915_setparam { + int param; + int value; +} drm_i915_setparam_t; + +/* A memory manager for regions of shared memory: + */ +#define I915_MEM_REGION_AGP 1 + +typedef struct drm_i915_mem_alloc { + int region; + int alignment; + int size; + int *region_offset; /* offset from start of fb or agp */ +} drm_i915_mem_alloc_t; + +typedef struct drm_i915_mem_free { + int region; + int region_offset; +} drm_i915_mem_free_t; + +typedef struct drm_i915_mem_init_heap { + int region; + int size; + int start; +} drm_i915_mem_init_heap_t; + +/* Allow memory manager to be torn down and re-initialized (eg on + * rotate): + */ +typedef struct drm_i915_mem_destroy_heap { + int region; +} drm_i915_mem_destroy_heap_t; + +/* Allow X server to configure which pipes to monitor for vblank signals + */ +#define DRM_I915_VBLANK_PIPE_A 1 +#define DRM_I915_VBLANK_PIPE_B 2 + +typedef struct drm_i915_vblank_pipe { + int pipe; +} drm_i915_vblank_pipe_t; + +/* Schedule buffer swap at given vertical blank: + */ +typedef struct drm_i915_vblank_swap { + drm_drawable_t drawable; + enum drm_vblank_seq_type seqtype; + unsigned int sequence; +} drm_i915_vblank_swap_t; + +typedef struct drm_i915_hws_addr { + __u64 addr; +} drm_i915_hws_addr_t; + +struct drm_i915_gem_init { + /** + * Beginning offset in the GTT to be managed by the DRM memory + * manager. + */ + __u64 gtt_start; + /** + * Ending offset in the GTT to be managed by the DRM memory + * manager. + */ + __u64 gtt_end; +}; + +struct drm_i915_gem_create { + /** + * Requested size for the object. + * + * The (page-aligned) allocated size for the object will be returned. + */ + __u64 size; + /** + * Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; + __u32 pad; +}; + +struct drm_i915_gem_pread { + /** Handle for the object being read. */ + __u32 handle; + __u32 pad; + /** Offset into the object to read from */ + __u64 offset; + /** Length of data to read */ + __u64 size; + /** + * Pointer to write the data into. + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 data_ptr; +}; + +struct drm_i915_gem_pwrite { + /** Handle for the object being written to. */ + __u32 handle; + __u32 pad; + /** Offset into the object to write to */ + __u64 offset; + /** Length of data to write */ + __u64 size; + /** + * Pointer to read the data from. + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 data_ptr; +}; + +struct drm_i915_gem_mmap { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 pad; + /** Offset in the object to map. */ + __u64 offset; + /** + * Length of data to map. + * + * The value will be page-aligned. + */ + __u64 size; + /** + * Returned pointer the data was mapped at. + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 addr_ptr; + + /** + * Flags for extended behaviour. + * + * Added in version 2. + */ + __u64 flags; +#define I915_MMAP_WC 0x1 +}; + +struct drm_i915_gem_mmap_gtt { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 pad; + /** + * Fake offset to use for subsequent mmap call + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 offset; +}; + +/** + * struct drm_i915_gem_mmap_offset - Retrieve an offset so we can mmap this + * buffer object. + * + * This struct is passed as argument to the `DRM_IOCTL_I915_GEM_MMAP_OFFSET` + * ioctl, and is used to retrieve the fake offset to mmap an object specified by + * &handle. + * + * The legacy way of using `DRM_IOCTL_I915_GEM_MMAP` is removed on gen12+. + * `DRM_IOCTL_I915_GEM_MMAP_GTT` is an older supported alias to this struct, but + * will behave as setting the &extensions to 0, and &flags to + * `I915_MMAP_OFFSET_GTT`. + */ +struct drm_i915_gem_mmap_offset { + /** @handle: Handle for the object being mapped. */ + __u32 handle; + /** @pad: Must be zero */ + __u32 pad; + /** + * @offset: The fake offset to use for subsequent mmap call + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 offset; + + /** + * @flags: Flags for extended behaviour. + * + * It is mandatory that one of the `MMAP_OFFSET` types + * should be included: + * + * - `I915_MMAP_OFFSET_GTT`: Use mmap with the object bound to GTT. + * (Write-Combined) + * - `I915_MMAP_OFFSET_WC`: Use Write-Combined caching. + * - `I915_MMAP_OFFSET_WB`: Use Write-Back caching. + * - `I915_MMAP_OFFSET_FIXED`: Use object placement to determine + * caching. + * + * On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only + * valid type. On devices without local memory, this caching mode is + * invalid. + * + * As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB + * will be used, depending on the object placement on creation. WB will + * be used when the object can only exist in system memory, WC + * otherwise. + */ + __u64 flags; + +#define I915_MMAP_OFFSET_GTT 0 +#define I915_MMAP_OFFSET_WC 1 +#define I915_MMAP_OFFSET_WB 2 +#define I915_MMAP_OFFSET_UC 3 +#define I915_MMAP_OFFSET_FIXED 4 + + /** + * @extensions: Zero-terminated chain of extensions. + * + * No current extensions defined; mbz. + */ + __u64 extensions; +}; + +/** + * struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in + * preparation for accessing the pages via some CPU domain. + * + * Specifying a new write or read domain will flush the object out of the + * previous domain(if required), before then updating the objects domain + * tracking with the new domain. + * + * Note this might involve waiting for the object first if it is still active on + * the GPU. + * + * Supported values for @read_domains and @write_domain: + * + * - I915_GEM_DOMAIN_WC: Uncached write-combined domain + * - I915_GEM_DOMAIN_CPU: CPU cache domain + * - I915_GEM_DOMAIN_GTT: Mappable aperture domain + * + * All other domains are rejected. + * + * Note that for discrete, starting from DG1, this is no longer supported, and + * is instead rejected. On such platforms the CPU domain is effectively static, + * where we also only support a single &drm_i915_gem_mmap_offset cache mode, + * which can't be set explicitly and instead depends on the object placements, + * as per the below. + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see + * &drm_i915_gem_create_ext_memory_regions) contain I915_MEMORY_CLASS_DEVICE + * then the object will be allocated and mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. + */ +struct drm_i915_gem_set_domain { + /** @handle: Handle for the object. */ + __u32 handle; + + /** @read_domains: New read domains. */ + __u32 read_domains; + + /** + * @write_domain: New write domain. + * + * Note that having something in the write domain implies it's in the + * read domain, and only that read domain. + */ + __u32 write_domain; +}; + +struct drm_i915_gem_sw_finish { + /** Handle for the object */ + __u32 handle; +}; + +struct drm_i915_gem_relocation_entry { + /** + * Handle of the buffer being pointed to by this relocation entry. + * + * It's appealing to make this be an index into the mm_validate_entry + * list to refer to the buffer, but this allows the driver to create + * a relocation list for state buffers and not re-write it per + * exec using the buffer. + */ + __u32 target_handle; + + /** + * Value to be added to the offset of the target buffer to make up + * the relocation entry. + */ + __u32 delta; + + /** Offset in the buffer the relocation entry will be written into */ + __u64 offset; + + /** + * Offset value of the target buffer that the relocation entry was last + * written as. + * + * If the buffer has the same offset as last time, we can skip syncing + * and writing the relocation. This value is written back out by + * the execbuffer ioctl when the relocation is written. + */ + __u64 presumed_offset; + + /** + * Target memory domains read by this operation. + */ + __u32 read_domains; + + /** + * Target memory domains written by this operation. + * + * Note that only one domain may be written by the whole + * execbuffer operation, so that where there are conflicts, + * the application will get -EINVAL back. + */ + __u32 write_domain; +}; + +/** @{ + * Intel memory domains + * + * Most of these just align with the various caches in + * the system and are used to flush and invalidate as + * objects end up cached in different domains. + */ +/** CPU cache */ +#define I915_GEM_DOMAIN_CPU 0x00000001 +/** Render cache, used by 2D and 3D drawing */ +#define I915_GEM_DOMAIN_RENDER 0x00000002 +/** Sampler cache, used by texture engine */ +#define I915_GEM_DOMAIN_SAMPLER 0x00000004 +/** Command queue, used to load batch buffers */ +#define I915_GEM_DOMAIN_COMMAND 0x00000008 +/** Instruction cache, used by shader programs */ +#define I915_GEM_DOMAIN_INSTRUCTION 0x00000010 +/** Vertex address cache */ +#define I915_GEM_DOMAIN_VERTEX 0x00000020 +/** GTT domain - aperture and scanout */ +#define I915_GEM_DOMAIN_GTT 0x00000040 +/** WC domain - uncached access */ +#define I915_GEM_DOMAIN_WC 0x00000080 +/** @} */ + +struct drm_i915_gem_exec_object { + /** + * User's handle for a buffer to be bound into the GTT for this + * operation. + */ + __u32 handle; + + /** Number of relocations to be performed on this buffer */ + __u32 relocation_count; + /** + * Pointer to array of struct drm_i915_gem_relocation_entry containing + * the relocations to be performed in this buffer. + */ + __u64 relocs_ptr; + + /** Required alignment in graphics aperture */ + __u64 alignment; + + /** + * Returned value of the updated offset of the object, for future + * presumed_offset writes. + */ + __u64 offset; +}; + +/* DRM_IOCTL_I915_GEM_EXECBUFFER was removed in Linux 5.13 */ +struct drm_i915_gem_execbuffer { + /** + * List of buffers to be validated with their relocations to be + * performend on them. + * + * This is a pointer to an array of struct drm_i915_gem_validate_entry. + * + * These buffers must be listed in an order such that all relocations + * a buffer is performing refer to buffers that have already appeared + * in the validate list. + */ + __u64 buffers_ptr; + __u32 buffer_count; + + /** Offset in the batchbuffer to start execution from. */ + __u32 batch_start_offset; + /** Bytes used in batchbuffer from batch_start_offset */ + __u32 batch_len; + __u32 DR1; + __u32 DR4; + __u32 num_cliprects; + /** This is a struct drm_clip_rect *cliprects */ + __u64 cliprects_ptr; +}; + +struct drm_i915_gem_exec_object2 { + /** + * User's handle for a buffer to be bound into the GTT for this + * operation. + */ + __u32 handle; + + /** Number of relocations to be performed on this buffer */ + __u32 relocation_count; + /** + * Pointer to array of struct drm_i915_gem_relocation_entry containing + * the relocations to be performed in this buffer. + */ + __u64 relocs_ptr; + + /** Required alignment in graphics aperture */ + __u64 alignment; + + /** + * When the EXEC_OBJECT_PINNED flag is specified this is populated by + * the user with the GTT offset at which this object will be pinned. + * + * When the I915_EXEC_NO_RELOC flag is specified this must contain the + * presumed_offset of the object. + * + * During execbuffer2 the kernel populates it with the value of the + * current GTT offset of the object, for future presumed_offset writes. + * + * See struct drm_i915_gem_create_ext for the rules when dealing with + * alignment restrictions with I915_MEMORY_CLASS_DEVICE, on devices with + * minimum page sizes, like DG2. + */ + __u64 offset; + +#define EXEC_OBJECT_NEEDS_FENCE (1 << 0) +#define EXEC_OBJECT_NEEDS_GTT (1 << 1) +#define EXEC_OBJECT_WRITE (1 << 2) +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1 << 3) +#define EXEC_OBJECT_PINNED (1 << 4) +#define EXEC_OBJECT_PAD_TO_SIZE (1 << 5) +/* The kernel implicitly tracks GPU activity on all GEM objects, and + * synchronises operations with outstanding rendering. This includes + * rendering on other devices if exported via dma-buf. However, sometimes + * this tracking is too coarse and the user knows better. For example, + * if the object is split into non-overlapping ranges shared between different + * clients or engines (i.e. suballocating objects), the implicit tracking + * by kernel assumes that each operation affects the whole object rather + * than an individual range, causing needless synchronisation between clients. + * The kernel will also forgo any CPU cache flushes prior to rendering from + * the object as the client is expected to be also handling such domain + * tracking. + * + * The kernel maintains the implicit tracking in order to manage resources + * used by the GPU - this flag only disables the synchronisation prior to + * rendering with this object in this execbuf. + * + * Opting out of implicit synhronisation requires the user to do its own + * explicit tracking to avoid rendering corruption. See, for example, + * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously. + */ +#define EXEC_OBJECT_ASYNC (1 << 6) +/* Request that the contents of this execobject be copied into the error + * state upon a GPU hang involving this batch for post-mortem debugging. + * These buffers are recorded in no particular order as "user" in + * /sys/class/drm/cardN/error. Query I915_PARAM_HAS_EXEC_CAPTURE to see + * if the kernel supports this flag. + */ +#define EXEC_OBJECT_CAPTURE (1 << 7) +/* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_CAPTURE << 1) + __u64 flags; + + union { + __u64 rsvd1; + __u64 pad_to_size; + }; + __u64 rsvd2; +}; + +/** + * struct drm_i915_gem_exec_fence - An input or output fence for the execbuf + * ioctl. + * + * The request will wait for input fence to signal before submission. + * + * The returned output fence will be signaled after the completion of the + * request. + */ +struct drm_i915_gem_exec_fence { + /** @handle: User's handle for a drm_syncobj to wait on or signal. */ + __u32 handle; + + /** + * @flags: Supported flags are: + * + * I915_EXEC_FENCE_WAIT: + * Wait for the input fence before request submission. + * + * I915_EXEC_FENCE_SIGNAL: + * Return request completion fence as output + */ + __u32 flags; +#define I915_EXEC_FENCE_WAIT (1 << 0) +#define I915_EXEC_FENCE_SIGNAL (1 << 1) +#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1)) +}; + +/** + * struct drm_i915_gem_execbuffer_ext_timeline_fences - Timeline fences + * for execbuf ioctl. + * + * This structure describes an array of drm_syncobj and associated points for + * timeline variants of drm_syncobj. It is invalid to append this structure to + * the execbuf if I915_EXEC_FENCE_ARRAY is set. + */ +struct drm_i915_gem_execbuffer_ext_timeline_fences { +#define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0 + /** @base: Extension link. See struct i915_user_extension. */ + struct i915_user_extension base; + + /** + * @fence_count: Number of elements in the @handles_ptr & @value_ptr + * arrays. + */ + __u64 fence_count; + + /** + * @handles_ptr: Pointer to an array of struct drm_i915_gem_exec_fence + * of length @fence_count. + */ + __u64 handles_ptr; + + /** + * @values_ptr: Pointer to an array of u64 values of length + * @fence_count. + * Values must be 0 for a binary drm_syncobj. A Value of 0 for a + * timeline drm_syncobj is invalid as it turns a drm_syncobj into a + * binary one. + */ + __u64 values_ptr; +}; + +/** + * struct drm_i915_gem_execbuffer2 - Structure for DRM_I915_GEM_EXECBUFFER2 + * ioctl. + */ +struct drm_i915_gem_execbuffer2 { + /** @buffers_ptr: Pointer to a list of gem_exec_object2 structs */ + __u64 buffers_ptr; + + /** @buffer_count: Number of elements in @buffers_ptr array */ + __u32 buffer_count; + + /** + * @batch_start_offset: Offset in the batchbuffer to start execution + * from. + */ + __u32 batch_start_offset; + + /** + * @batch_len: Length in bytes of the batch buffer, starting from the + * @batch_start_offset. If 0, length is assumed to be the batch buffer + * object size. + */ + __u32 batch_len; + + /** @DR1: deprecated */ + __u32 DR1; + + /** @DR4: deprecated */ + __u32 DR4; + + /** @num_cliprects: See @cliprects_ptr */ + __u32 num_cliprects; + + /** + * @cliprects_ptr: Kernel clipping was a DRI1 misfeature. + * + * It is invalid to use this field if I915_EXEC_FENCE_ARRAY or + * I915_EXEC_USE_EXTENSIONS flags are not set. + * + * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array + * of &drm_i915_gem_exec_fence and @num_cliprects is the length of the + * array. + * + * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a + * single &i915_user_extension and num_cliprects is 0. + */ + __u64 cliprects_ptr; + + /** @flags: Execbuf flags */ + __u64 flags; +#define I915_EXEC_RING_MASK (0x3f) +#define I915_EXEC_DEFAULT (0 << 0) +#define I915_EXEC_RENDER (1 << 0) +#define I915_EXEC_BSD (2 << 0) +#define I915_EXEC_BLT (3 << 0) +#define I915_EXEC_VEBOX (4 << 0) + +/* Used for switching the constants addressing mode on gen4+ RENDER ring. + * Gen6+ only supports relative addressing to dynamic state (default) and + * absolute addressing. + * + * These flags are ignored for the BSD and BLT rings. + */ +#define I915_EXEC_CONSTANTS_MASK (3 << 6) +#define I915_EXEC_CONSTANTS_REL_GENERAL (0 << 6) /* default */ +#define I915_EXEC_CONSTANTS_ABSOLUTE (1 << 6) +#define I915_EXEC_CONSTANTS_REL_SURFACE (2 << 6) /* gen4/5 only */ + +/** Resets the SO write offset registers for transform feedback on gen7. */ +#define I915_EXEC_GEN7_SOL_RESET (1 << 8) + +/** Request a privileged ("secure") batch buffer. Note only available for + * DRM_ROOT_ONLY | DRM_MASTER processes. + */ +#define I915_EXEC_SECURE (1 << 9) + +/** Inform the kernel that the batch is and will always be pinned. This + * negates the requirement for a workaround to be performed to avoid + * an incoherent CS (such as can be found on 830/845). If this flag is + * not passed, the kernel will endeavour to make sure the batch is + * coherent with the CS before execution. If this flag is passed, + * userspace assumes the responsibility for ensuring the same. + */ +#define I915_EXEC_IS_PINNED (1 << 10) + +/** Provide a hint to the kernel that the command stream and auxiliary + * state buffers already holds the correct presumed addresses and so the + * relocation process may be skipped if no buffers need to be moved in + * preparation for the execbuffer. + */ +#define I915_EXEC_NO_RELOC (1 << 11) + +/** Use the reloc.handle as an index into the exec object array rather + * than as the per-file handle. + */ +#define I915_EXEC_HANDLE_LUT (1 << 12) + +/** Used for switching BSD rings on the platforms with two BSD rings */ +#define I915_EXEC_BSD_SHIFT (13) +#define I915_EXEC_BSD_MASK (3 << I915_EXEC_BSD_SHIFT) +/* default ping-pong mode */ +#define I915_EXEC_BSD_DEFAULT (0 << I915_EXEC_BSD_SHIFT) +#define I915_EXEC_BSD_RING1 (1 << I915_EXEC_BSD_SHIFT) +#define I915_EXEC_BSD_RING2 (2 << I915_EXEC_BSD_SHIFT) + +/** Tell the kernel that the batchbuffer is processed by + * the resource streamer. + */ +#define I915_EXEC_RESOURCE_STREAMER (1 << 15) + +/* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent + * a sync_file fd to wait upon (in a nonblocking manner) prior to executing + * the batch. + * + * Returns -EINVAL if the sync_file fd cannot be found. + */ +#define I915_EXEC_FENCE_IN (1 << 16) + +/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd + * in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given + * to the caller, and it should be close() after use. (The fd is a regular + * file descriptor and will be cleaned up on process termination. It holds + * a reference to the request, but nothing else.) + * + * The sync_file fd can be combined with other sync_file and passed either + * to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip + * will only occur after this request completes), or to other devices. + * + * Using I915_EXEC_FENCE_OUT requires use of + * DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written + * back to userspace. Failure to do so will cause the out-fence to always + * be reported as zero, and the real fence fd to be leaked. + */ +#define I915_EXEC_FENCE_OUT (1 << 17) + +/* + * Traditionally the execbuf ioctl has only considered the final element in + * the execobject[] to be the executable batch. Often though, the client + * will known the batch object prior to construction and being able to place + * it into the execobject[] array first can simplify the relocation tracking. + * Setting I915_EXEC_BATCH_FIRST tells execbuf to use element 0 of the + * execobject[] as the * batch instead (the default is to use the last + * element). + */ +#define I915_EXEC_BATCH_FIRST (1 << 18) + +/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr + * define an array of i915_gem_exec_fence structures which specify a set of + * dma fences to wait upon or signal. + */ +#define I915_EXEC_FENCE_ARRAY (1 << 19) + +/* + * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent + * a sync_file fd to wait upon (in a nonblocking manner) prior to executing + * the batch. + * + * Returns -EINVAL if the sync_file fd cannot be found. + */ +#define I915_EXEC_FENCE_SUBMIT (1 << 20) + +/* + * Setting I915_EXEC_USE_EXTENSIONS implies that + * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked + * list of i915_user_extension. Each i915_user_extension node is the base of a + * larger structure. The list of supported structures are listed in the + * drm_i915_gem_execbuffer_ext enum. + */ +#define I915_EXEC_USE_EXTENSIONS (1 << 21) +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS << 1)) + + /** @rsvd1: Context id */ + __u64 rsvd1; + + /** + * @rsvd2: in and out sync_file file descriptors. + * + * When I915_EXEC_FENCE_IN or I915_EXEC_FENCE_SUBMIT flag is set, the + * lower 32 bits of this field will have the in sync_file fd (input). + * + * When I915_EXEC_FENCE_OUT flag is set, the upper 32 bits of this + * field will have the out sync_file fd (output). + */ + __u64 rsvd2; +}; + +#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) +#define i915_execbuffer2_set_context_id(eb2, context) \ + (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK +#define i915_execbuffer2_get_context_id(eb2) \ + ((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK) + +struct drm_i915_gem_pin { + /** Handle of the buffer to be pinned. */ + __u32 handle; + __u32 pad; + + /** alignment required within the aperture */ + __u64 alignment; + + /** Returned GTT offset of the buffer. */ + __u64 offset; +}; + +struct drm_i915_gem_unpin { + /** Handle of the buffer to be unpinned. */ + __u32 handle; + __u32 pad; +}; + +struct drm_i915_gem_busy { + /** Handle of the buffer to check for busy */ + __u32 handle; + + /** Return busy status + * + * A return of 0 implies that the object is idle (after + * having flushed any pending activity), and a non-zero return that + * the object is still in-flight on the GPU. (The GPU has not yet + * signaled completion for all pending requests that reference the + * object.) An object is guaranteed to become idle eventually (so + * long as no new GPU commands are executed upon it). Due to the + * asynchronous nature of the hardware, an object reported + * as busy may become idle before the ioctl is completed. + * + * Furthermore, if the object is busy, which engine is busy is only + * provided as a guide and only indirectly by reporting its class + * (there may be more than one engine in each class). There are race + * conditions which prevent the report of which engines are busy from + * being always accurate. However, the converse is not true. If the + * object is idle, the result of the ioctl, that all engines are idle, + * is accurate. + * + * The returned dword is split into two fields to indicate both + * the engine classes on which the object is being read, and the + * engine class on which it is currently being written (if any). + * + * The low word (bits 0:15) indicate if the object is being written + * to by any engine (there can only be one, as the GEM implicit + * synchronisation rules force writes to be serialised). Only the + * engine class (offset by 1, I915_ENGINE_CLASS_RENDER is reported as + * 1 not 0 etc) for the last write is reported. + * + * The high word (bits 16:31) are a bitmask of which engines classes + * are currently reading from the object. Multiple engines may be + * reading from the object simultaneously. + * + * The value of each engine class is the same as specified in the + * I915_CONTEXT_PARAM_ENGINES context parameter and via perf, i.e. + * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc. + * Some hardware may have parallel execution engines, e.g. multiple + * media engines, which are mapped to the same class identifier and so + * are not separately reported for busyness. + * + * Caveat emptor: + * Only the boolean result of this query is reliable; that is whether + * the object is idle or busy. The report of which engines are busy + * should be only used as a heuristic. + */ + __u32 busy; +}; + +/** + * struct drm_i915_gem_caching - Set or get the caching for given object + * handle. + * + * Allow userspace to control the GTT caching bits for a given object when the + * object is later mapped through the ppGTT(or GGTT on older platforms lacking + * ppGTT support, or if the object is used for scanout). Note that this might + * require unbinding the object from the GTT first, if its current caching value + * doesn't match. + * + * Note that this all changes on discrete platforms, starting from DG1, the + * set/get caching is no longer supported, and is now rejected. Instead the CPU + * caching attributes(WB vs WC) will become an immutable creation time property + * for the object, along with the GTT caching level. For now we don't expose any + * new uAPI for this, instead on DG1 this is all implicit, although this largely + * shouldn't matter since DG1 is coherent by default(without any way of + * controlling it). + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see + * &drm_i915_gem_create_ext_memory_regions) contain I915_MEMORY_CLASS_DEVICE + * then the object will be allocated and mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. + * + * Side note: Part of the reason for this is that changing the + * at-allocation-time CPU caching attributes for the pages might be required(and + * is expensive) if we need to then CPU map the pages later with different + * caching attributes. This inconsistent caching behaviour, while supported on + * x86, is not universally supported on other architectures. So for simplicity + * we opt for setting everything at creation time, whilst also making it + * immutable, on discrete platforms. + */ +struct drm_i915_gem_caching { + /** + * @handle: Handle of the buffer to set/get the caching level. + */ + __u32 handle; + + /** + * @caching: The GTT caching level to apply or possible return value. + * + * The supported @caching values: + * + * I915_CACHING_NONE: + * + * GPU access is not coherent with CPU caches. Default for machines + * without an LLC. This means manual flushing might be needed, if we + * want GPU access to be coherent. + * + * I915_CACHING_CACHED: + * + * GPU access is coherent with CPU caches and furthermore the data is + * cached in last-level caches shared between CPU cores and the GPU GT. + * + * I915_CACHING_DISPLAY: + * + * Special GPU caching mode which is coherent with the scanout engines. + * Transparently falls back to I915_CACHING_NONE on platforms where no + * special cache mode (like write-through or gfdt flushing) is + * available. The kernel automatically sets this mode when using a + * buffer as a scanout target. Userspace can manually set this mode to + * avoid a costly stall and clflush in the hotpath of drawing the first + * frame. + */ +#define I915_CACHING_NONE 0 +#define I915_CACHING_CACHED 1 +#define I915_CACHING_DISPLAY 2 + __u32 caching; +}; + +#define I915_TILING_NONE 0 +#define I915_TILING_X 1 +#define I915_TILING_Y 2 +/* + * Do not add new tiling types here. The I915_TILING_* values are for + * de-tiling fence registers that no longer exist on modern platforms. Although + * the hardware may support new types of tiling in general (e.g., Tile4), we + * do not need to add them to the uapi that is specific to now-defunct ioctls. + */ +#define I915_TILING_LAST I915_TILING_Y + +#define I915_BIT_6_SWIZZLE_NONE 0 +#define I915_BIT_6_SWIZZLE_9 1 +#define I915_BIT_6_SWIZZLE_9_10 2 +#define I915_BIT_6_SWIZZLE_9_11 3 +#define I915_BIT_6_SWIZZLE_9_10_11 4 +/* Not seen by userland */ +#define I915_BIT_6_SWIZZLE_UNKNOWN 5 +/* Seen by userland. */ +#define I915_BIT_6_SWIZZLE_9_17 6 +#define I915_BIT_6_SWIZZLE_9_10_17 7 + +struct drm_i915_gem_set_tiling { + /** Handle of the buffer to have its tiling state updated */ + __u32 handle; + + /** + * Tiling mode for the object (I915_TILING_NONE, I915_TILING_X, + * I915_TILING_Y). + * + * This value is to be set on request, and will be updated by the + * kernel on successful return with the actual chosen tiling layout. + * + * The tiling mode may be demoted to I915_TILING_NONE when the system + * has bit 6 swizzling that can't be managed correctly by GEM. + * + * Buffer contents become undefined when changing tiling_mode. + */ + __u32 tiling_mode; + + /** + * Stride in bytes for the object when in I915_TILING_X or + * I915_TILING_Y. + */ + __u32 stride; + + /** + * Returned address bit 6 swizzling required for CPU access through + * mmap mapping. + */ + __u32 swizzle_mode; +}; + +struct drm_i915_gem_get_tiling { + /** Handle of the buffer to get tiling state for. */ + __u32 handle; + + /** + * Current tiling mode for the object (I915_TILING_NONE, I915_TILING_X, + * I915_TILING_Y). + */ + __u32 tiling_mode; + + /** + * Returned address bit 6 swizzling required for CPU access through + * mmap mapping. + */ + __u32 swizzle_mode; + + /** + * Returned address bit 6 swizzling required for CPU access through + * mmap mapping whilst bound. + */ + __u32 phys_swizzle_mode; +}; + +struct drm_i915_gem_get_aperture { + /** Total size of the aperture used by i915_gem_execbuffer, in bytes */ + __u64 aper_size; + + /** + * Available space in the aperture used by i915_gem_execbuffer, in + * bytes + */ + __u64 aper_available_size; +}; + +struct drm_i915_get_pipe_from_crtc_id { + /** ID of CRTC being requested **/ + __u32 crtc_id; + + /** pipe of requested CRTC **/ + __u32 pipe; +}; + +#define I915_MADV_WILLNEED 0 +#define I915_MADV_DONTNEED 1 +#define __I915_MADV_PURGED 2 /* internal state */ + +struct drm_i915_gem_madvise { + /** Handle of the buffer to change the backing store advice */ + __u32 handle; + + /* Advice: either the buffer will be needed again in the near future, + * or won't be and could be discarded under memory pressure. + */ + __u32 madv; + + /** Whether the backing store still exists. */ + __u32 retained; +}; + +/* flags */ +#define I915_OVERLAY_TYPE_MASK 0xff +#define I915_OVERLAY_YUV_PLANAR 0x01 +#define I915_OVERLAY_YUV_PACKED 0x02 +#define I915_OVERLAY_RGB 0x03 + +#define I915_OVERLAY_DEPTH_MASK 0xff00 +#define I915_OVERLAY_RGB24 0x1000 +#define I915_OVERLAY_RGB16 0x2000 +#define I915_OVERLAY_RGB15 0x3000 +#define I915_OVERLAY_YUV422 0x0100 +#define I915_OVERLAY_YUV411 0x0200 +#define I915_OVERLAY_YUV420 0x0300 +#define I915_OVERLAY_YUV410 0x0400 + +#define I915_OVERLAY_SWAP_MASK 0xff0000 +#define I915_OVERLAY_NO_SWAP 0x000000 +#define I915_OVERLAY_UV_SWAP 0x010000 +#define I915_OVERLAY_Y_SWAP 0x020000 +#define I915_OVERLAY_Y_AND_UV_SWAP 0x030000 + +#define I915_OVERLAY_FLAGS_MASK 0xff000000 +#define I915_OVERLAY_ENABLE 0x01000000 + +struct drm_intel_overlay_put_image { + /* various flags and src format description */ + __u32 flags; + /* source picture description */ + __u32 bo_handle; + /* stride values and offsets are in bytes, buffer relative */ + __u16 stride_Y; /* stride for packed formats */ + __u16 stride_UV; + __u32 offset_Y; /* offset for packet formats */ + __u32 offset_U; + __u32 offset_V; + /* in pixels */ + __u16 src_width; + __u16 src_height; + /* to compensate the scaling factors for partially covered surfaces */ + __u16 src_scan_width; + __u16 src_scan_height; + /* output crtc description */ + __u32 crtc_id; + __u16 dst_x; + __u16 dst_y; + __u16 dst_width; + __u16 dst_height; +}; + +/* flags */ +#define I915_OVERLAY_UPDATE_ATTRS (1 << 0) +#define I915_OVERLAY_UPDATE_GAMMA (1 << 1) +#define I915_OVERLAY_DISABLE_DEST_COLORKEY (1 << 2) +struct drm_intel_overlay_attrs { + __u32 flags; + __u32 color_key; + __s32 brightness; + __u32 contrast; + __u32 saturation; + __u32 gamma0; + __u32 gamma1; + __u32 gamma2; + __u32 gamma3; + __u32 gamma4; + __u32 gamma5; +}; + +/* + * Intel sprite handling + * + * Color keying works with a min/mask/max tuple. Both source and destination + * color keying is allowed. + * + * Source keying: + * Sprite pixels within the min & max values, masked against the color channels + * specified in the mask field, will be transparent. All other pixels will + * be displayed on top of the primary plane. For RGB surfaces, only the min + * and mask fields will be used; ranged compares are not allowed. + * + * Destination keying: + * Primary plane pixels that match the min value, masked against the color + * channels specified in the mask field, will be replaced by corresponding + * pixels from the sprite plane. + * + * Note that source & destination keying are exclusive; only one can be + * active on a given plane. + */ + +#define I915_SET_COLORKEY_NONE \ + (1 << 0) /* Deprecated. Instead set \ + * flags==0 to disable colorkeying. \ + */ +#define I915_SET_COLORKEY_DESTINATION (1 << 1) +#define I915_SET_COLORKEY_SOURCE (1 << 2) +struct drm_intel_sprite_colorkey { + __u32 plane_id; + __u32 min_value; + __u32 channel_mask; + __u32 max_value; + __u32 flags; +}; + +struct drm_i915_gem_wait { + /** Handle of BO we shall wait on */ + __u32 bo_handle; + __u32 flags; + /** Number of nanoseconds to wait, Returns time remaining. */ + __s64 timeout_ns; +}; + +struct drm_i915_gem_context_create { + __u32 ctx_id; /* output: id of new context*/ + __u32 pad; +}; + +/** + * struct drm_i915_gem_context_create_ext - Structure for creating contexts. + */ +struct drm_i915_gem_context_create_ext { + /** @ctx_id: Id of the created context (output) */ + __u32 ctx_id; + + /** + * @flags: Supported flags are: + * + * I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS: + * + * Extensions may be appended to this structure and driver must check + * for those. See @extensions. + * + * I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE + * + * Created context will have single timeline. + */ + __u32 flags; +#define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS (1u << 0) +#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1u << 1) +#define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \ + (-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1)) + + /** + * @extensions: Zero-terminated chain of extensions. + * + * I915_CONTEXT_CREATE_EXT_SETPARAM: + * Context parameter to set or query during context creation. + * See struct drm_i915_gem_context_create_ext_setparam. + * + * I915_CONTEXT_CREATE_EXT_CLONE: + * This extension has been removed. On the off chance someone somewhere + * has attempted to use it, never re-use this extension number. + */ + __u64 extensions; +#define I915_CONTEXT_CREATE_EXT_SETPARAM 0 +#define I915_CONTEXT_CREATE_EXT_CLONE 1 +}; + +/** + * struct drm_i915_gem_context_param - Context parameter to set or query. + */ +struct drm_i915_gem_context_param { + /** @ctx_id: Context id */ + __u32 ctx_id; + + /** @size: Size of the parameter @value */ + __u32 size; + + /** @param: Parameter to set or query */ + __u64 param; +#define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 +/* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance + * someone somewhere has attempted to use it, never re-use this context + * param number. + */ +#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 +#define I915_CONTEXT_PARAM_GTT_SIZE 0x3 +#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 +#define I915_CONTEXT_PARAM_BANNABLE 0x5 +#define I915_CONTEXT_PARAM_PRIORITY 0x6 +#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */ +#define I915_CONTEXT_DEFAULT_PRIORITY 0 +#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */ + /* + * When using the following param, value should be a pointer to + * drm_i915_gem_context_param_sseu. + */ +#define I915_CONTEXT_PARAM_SSEU 0x7 + +/* + * Not all clients may want to attempt automatic recover of a context after + * a hang (for example, some clients may only submit very small incremental + * batches relying on known logical state of previous batches which will never + * recover correctly and each attempt will hang), and so would prefer that + * the context is forever banned instead. + * + * If set to false (0), after a reset, subsequent (and in flight) rendering + * from this context is discarded, and the client will need to create a new + * context to use instead. + * + * If set to true (1), the kernel will automatically attempt to recover the + * context by skipping the hanging batch and executing the next batch starting + * from the default context state (discarding the incomplete logical context + * state lost due to the reset). + * + * On creation, all new contexts are marked as recoverable. + */ +#define I915_CONTEXT_PARAM_RECOVERABLE 0x8 + + /* + * The id of the associated virtual memory address space (ppGTT) of + * this context. Can be retrieved and passed to another context + * (on the same fd) for both to use the same ppGTT and so share + * address layouts, and avoid reloading the page tables on context + * switches between themselves. + * + * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY. + */ +#define I915_CONTEXT_PARAM_VM 0x9 + +/* + * I915_CONTEXT_PARAM_ENGINES: + * + * Bind this context to operate on this subset of available engines. Henceforth, + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0] + * and upwards. Slots 0...N are filled in using the specified (class, instance). + * Use + * engine_class: I915_ENGINE_CLASS_INVALID, + * engine_instance: I915_ENGINE_CLASS_INVALID_NONE + * to specify a gap in the array that can be filled in later, e.g. by a + * virtual engine used for load balancing. + * + * Setting the number of engines bound to the context to 0, by passing a zero + * sized argument, will revert back to default settings. + * + * See struct i915_context_param_engines. + * + * Extensions: + * i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE) + * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND) + * i915_context_engines_parallel_submit + * (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT) + */ +#define I915_CONTEXT_PARAM_ENGINES 0xa + +/* + * I915_CONTEXT_PARAM_PERSISTENCE: + * + * Allow the context and active rendering to survive the process until + * completion. Persistence allows fire-and-forget clients to queue up a + * bunch of work, hand the output over to a display server and then quit. + * If the context is marked as not persistent, upon closing (either via + * an explicit DRM_I915_GEM_CONTEXT_DESTROY or implicitly from file closure + * or process termination), the context and any outstanding requests will be + * cancelled (and exported fences for cancelled requests marked as -EIO). + * + * By default, new contexts allow persistence. + */ +#define I915_CONTEXT_PARAM_PERSISTENCE 0xb + +/* This API has been removed. On the off chance someone somewhere has + * attempted to use it, never re-use this context param number. + */ +#define I915_CONTEXT_PARAM_RINGSIZE 0xc + +/* + * I915_CONTEXT_PARAM_PROTECTED_CONTENT: + * + * Mark that the context makes use of protected content, which will result + * in the context being invalidated when the protected content session is. + * Given that the protected content session is killed on suspend, the device + * is kept awake for the lifetime of a protected context, so the user should + * make sure to dispose of them once done. + * This flag can only be set at context creation time and, when set to true, + * must be preceded by an explicit setting of I915_CONTEXT_PARAM_RECOVERABLE + * to false. This flag can't be set to true in conjunction with setting the + * I915_CONTEXT_PARAM_BANNABLE flag to false. Creation example: + * + * .. code-block:: C + * + * struct drm_i915_gem_context_create_ext_setparam p_protected = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_PROTECTED_CONTENT, + * .value = 1, + * } + * }; + * struct drm_i915_gem_context_create_ext_setparam p_norecover = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * .next_extension = to_user_pointer(&p_protected), + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_RECOVERABLE, + * .value = 0, + * } + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_norecover); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * In addition to the normal failure cases, setting this flag during context + * creation can result in the following errors: + * + * -ENODEV: feature not available + * -EPERM: trying to mark a recoverable or not bannable context as protected + * -ENXIO: A dependency such as a component driver or firmware is not yet + * loaded so user space may need to attempt again. Depending on the + * device, this error may be reported if protected context creation is + * attempted very early after kernel start because the internal timeout + * waiting for such dependencies is not guaranteed to be larger than + * required (numbers differ depending on system and kernel config): + * - ADL/RPL: dependencies may take up to 3 seconds from kernel start + * while context creation internal timeout is 250 milisecs + * - MTL: dependencies may take up to 8 seconds from kernel start + * while context creation internal timeout is 250 milisecs + * NOTE: such dependencies happen once, so a subsequent call to create a + * protected context after a prior successful call will not experience + * such timeouts and will not return -ENXIO (unless the driver is + * reloaded, or, depending on the device, resumes from a suspended state). -EIO: + * The firmware did not succeed in creating the protected context. + */ +#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd + +/* + * I915_CONTEXT_PARAM_LOW_LATENCY: + * + * Mark this context as a low latency workload which requires aggressive GT + * frequency scaling. Use I915_PARAM_HAS_CONTEXT_FREQ_HINT to check if the + * kernel supports this per context flag. + */ +#define I915_CONTEXT_PARAM_LOW_LATENCY 0xe + +/* + * I915_CONTEXT_PARAM_CONTEXT_IMAGE: + * + * Allows userspace to provide own context images. + * + * Note that this is a debug API not available on production kernel builds. + */ +#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf + /* Must be kept compact -- no holes and well documented */ + + /** @value: Context parameter value to be set or queried */ + __u64 value; +}; + +/* + * Context SSEU programming + * + * It may be necessary for either functional or performance reason to configure + * a context to run with a reduced number of SSEU (where SSEU stands for Slice/ + * Sub-slice/EU). + * + * This is done by configuring SSEU configuration using the below + * @struct drm_i915_gem_context_param_sseu for every supported engine which + * userspace intends to use. + * + * Not all GPUs or engines support this functionality in which case an error + * code -ENODEV will be returned. + * + * Also, flexibility of possible SSEU configuration permutations varies between + * GPU generations and software imposed limitations. Requesting such a + * combination will return an error code of -EINVAL. + * + * NOTE: When perf/OA is active the context's SSEU configuration is ignored in + * favour of a single global setting. + */ +struct drm_i915_gem_context_param_sseu { + /* + * Engine class & instance to be configured or queried. + */ + struct i915_engine_class_instance engine; + + /* + * Unknown flags must be cleared to zero. + */ + __u32 flags; +#define I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX (1u << 0) + + /* + * Mask of slices to enable for the context. Valid values are a subset + * of the bitmask value returned for I915_PARAM_SLICE_MASK. + */ + __u64 slice_mask; + + /* + * Mask of subslices to enable for the context. Valid values are a + * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK. + */ + __u64 subslice_mask; + + /* + * Minimum/Maximum number of EUs to enable per subslice for the + * context. min_eus_per_subslice must be inferior or equal to + * max_eus_per_subslice. + */ + __u16 min_eus_per_subslice; + __u16 max_eus_per_subslice; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 rsvd; +}; + +/** + * DOC: Virtual Engine uAPI + * + * Virtual engine is a concept where userspace is able to configure a set of + * physical engines, submit a batch buffer, and let the driver execute it on any + * engine from the set as it sees fit. + * + * This is primarily useful on parts which have multiple instances of a same + * class engine, like for example GT3+ Skylake parts with their two VCS engines. + * + * For instance userspace can enumerate all engines of a certain class using the + * previously described `Engine Discovery uAPI`_. After that userspace can + * create a GEM context with a placeholder slot for the virtual engine (using + * `I915_ENGINE_CLASS_INVALID` and `I915_ENGINE_CLASS_INVALID_NONE` for class + * and instance respectively) and finally using the + * `I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE` extension place a virtual engine in + * the same reserved slot. + * + * Example of creating a virtual engine and submitting a batch buffer to it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(virtual, 2) = { + * .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE, + * .engine_index = 0, // Place this virtual engine into engine map + * slot 0 .num_siblings = 2, .engines = { { I915_ENGINE_CLASS_VIDEO, 0 }, { + * I915_ENGINE_CLASS_VIDEO, 1 }, }, + * }; + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = { + * .engines = { { I915_ENGINE_CLASS_INVALID, + * I915_ENGINE_CLASS_INVALID_NONE } }, + * .extensions = to_user_pointer(&virtual), // Chains after + * load_balance extension + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // Now we have created a GEM context with its engine map containing a + * // single virtual engine. Submissions to this slot can go either to + * // vcs0 or vcs1, depending on the load balancing algorithm used inside + * // the driver. The load balancing is dynamic from one batch buffer to + * // another and transparent to userspace. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0 which is the virtual engine + * gem_execbuf(drm_fd, &execbuf); + */ + +/* + * i915_context_engines_load_balance: + * + * Enable load balancing across this set of engines. + * + * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when + * used will proxy the execbuffer request onto one of the set of engines + * in such a way as to distribute the load evenly across the set. + * + * The set of engines must be compatible (e.g. the same HW class) as they + * will share the same logical GPU context and ring. + * + * To intermix rendering with the virtual engine and direct rendering onto + * the backing engines (bypassing the load balancing proxy), the context must + * be defined to use a single timeline for all engines. + */ +struct i915_context_engines_load_balance { + struct i915_user_extension base; + + __u16 engine_index; + __u16 num_siblings; + __u32 flags; /* all undefined flags must be zero */ + + __u64 mbz64; /* reserved for future use; must be zero */ + + struct i915_engine_class_instance engines[]; +} __attribute__((packed)); + +#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) \ + struct { \ + struct i915_user_extension base; \ + __u16 engine_index; \ + __u16 num_siblings; \ + __u32 flags; \ + __u64 mbz64; \ + struct i915_engine_class_instance engines[N__]; \ + } __attribute__((packed)) name__ + +/* + * i915_context_engines_bond: + * + * Constructed bonded pairs for execution within a virtual engine. + * + * All engines are equal, but some are more equal than others. Given + * the distribution of resources in the HW, it may be preferable to run + * a request on a given subset of engines in parallel to a request on a + * specific engine. We enable this selection of engines within a virtual + * engine by specifying bonding pairs, for any given master engine we will + * only execute on one of the corresponding siblings within the virtual engine. + * + * To execute a request in parallel on the master engine and a sibling requires + * coordination with a I915_EXEC_FENCE_SUBMIT. + */ +struct i915_context_engines_bond { + struct i915_user_extension base; + + struct i915_engine_class_instance master; + + __u16 virtual_index; /* index of virtual engine in ctx->engines[] */ + __u16 num_bonds; + + __u64 flags; /* all undefined flags must be zero */ + __u64 mbz64[4]; /* reserved for future use; must be zero */ + + struct i915_engine_class_instance engines[]; +} __attribute__((packed)); + +#define I915_DEFINE_CONTEXT_ENGINES_BOND(name__, N__) \ + struct { \ + struct i915_user_extension base; \ + struct i915_engine_class_instance master; \ + __u16 virtual_index; \ + __u16 num_bonds; \ + __u64 flags; \ + __u64 mbz64[4]; \ + struct i915_engine_class_instance engines[N__]; \ + } __attribute__((packed)) name__ + +/** + * struct i915_context_engines_parallel_submit - Configure engine for + * parallel submission. + * + * Setup a slot in the context engine map to allow multiple BBs to be submitted + * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU + * in parallel. Multiple hardware contexts are created internally in the i915 to + * run these BBs. Once a slot is configured for N BBs only N BBs can be + * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user + * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how + * many BBs there are based on the slot's configuration. The N BBs are the last + * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set. + * + * The default placement behavior is to create implicit bonds between each + * context if each context maps to more than 1 physical engine (e.g. context is + * a virtual engine). Also we only allow contexts of same engine class and these + * contexts must be in logically contiguous order. Examples of the placement + * behavior are described below. Lastly, the default is to not allow BBs to be + * preempted mid-batch. Rather insert coordinated preemption points on all + * hardware contexts between each set of BBs. Flags could be added in the future + * to change both of these default behaviors. + * + * Returns -EINVAL if hardware context placement configuration is invalid or if + * the placement configuration isn't supported on the platform / submission + * interface. + * Returns -ENODEV if extension isn't supported on the platform / submission + * interface. + * + * .. code-block:: none + * + * Examples syntax: + * CS[X] = generic engine of same class, logical instance X + * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE + * + * Example 1 pseudo code: + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=1, + * engines=CS[0],CS[1]) + * + * Results in the following valid placement: + * CS[0], CS[1] + * + * Example 2 pseudo code: + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=2, + * engines=CS[0],CS[2],CS[1],CS[3]) + * + * Results in the following valid placements: + * CS[0], CS[1] + * CS[2], CS[3] + * + * This can be thought of as two virtual engines, each containing two + * engines thereby making a 2D array. However, there are bonds tying the + * entries together and placing restrictions on how they can be scheduled. + * Specifically, the scheduler can choose only vertical columns from the 2D + * array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the + * scheduler wants to submit to CS[0], it must also choose CS[1] and vice + * versa. Same for CS[2] requires also using CS[3]. + * VE[0] = CS[0], CS[2] + * VE[1] = CS[1], CS[3] + * + * Example 3 pseudo code: + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=2, + * engines=CS[0],CS[1],CS[1],CS[3]) + * + * Results in the following valid and invalid placements: + * CS[0], CS[1] + * CS[1], CS[3] - Not logically contiguous, return -EINVAL + */ +struct i915_context_engines_parallel_submit { + /** + * @base: base user extension. + */ + struct i915_user_extension base; + + /** + * @engine_index: slot for parallel engine + */ + __u16 engine_index; + + /** + * @width: number of contexts per parallel engine or in other words the + * number of batches in each submission + */ + __u16 width; + + /** + * @num_siblings: number of siblings per context or in other words the + * number of possible placements for each submission + */ + __u16 num_siblings; + + /** + * @mbz16: reserved for future use; must be zero + */ + __u16 mbz16; + + /** + * @flags: all undefined flags must be zero, currently not defined flags + */ + __u64 flags; + + /** + * @mbz64: reserved for future use; must be zero + */ + __u64 mbz64[3]; + + /** + * @engines: 2-d array of engine instances to configure parallel engine + * + * length = width (i) * num_siblings (j) + * index = j + i * num_siblings + */ + struct i915_engine_class_instance engines[]; + +} __attribute__((packed)); + +#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) \ + struct { \ + struct i915_user_extension base; \ + __u16 engine_index; \ + __u16 width; \ + __u16 num_siblings; \ + __u16 mbz16; \ + __u64 flags; \ + __u64 mbz64[3]; \ + struct i915_engine_class_instance engines[N__]; \ + } __attribute__((packed)) name__ + +/** + * DOC: Context Engine Map uAPI + * + * Context engine map is a new way of addressing engines when submitting batch- + * buffers, replacing the existing way of using identifiers like `I915_EXEC_BLT` + * inside the flags field of `struct drm_i915_gem_execbuffer2`. + * + * To use it created GEM contexts need to be configured with a list of engines + * the user is intending to submit to. This is accomplished using the + * `I915_CONTEXT_PARAM_ENGINES` parameter and `struct + * i915_context_param_engines`. + * + * For such contexts the `I915_EXEC_RING_MASK` field becomes an index into the + * configured map. + * + * Example of creating such context and submitting against it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = { + * .engines = { { I915_ENGINE_CLASS_RENDER, 0 }, + * { I915_ENGINE_CLASS_COPY, 0 } } + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // We have now created a GEM context with two engines in the map: + * // Index 0 points to rcs0 while index 1 points to bcs0. Other engines + * // will not be accessible from this context. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0, which is rcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 1; // Submits to index 0, which is bcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + */ + +struct i915_context_param_engines { + __u64 extensions; /* linked chain of extension blocks, 0 terminates */ +#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE \ + 0 /* see i915_context_engines_load_balance */ +#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */ +#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT \ + 2 /* see i915_context_engines_parallel_submit */ + struct i915_engine_class_instance engines[]; +} __attribute__((packed)); + +#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) \ + struct { \ + __u64 extensions; \ + struct i915_engine_class_instance engines[N__]; \ + } __attribute__((packed)) name__ + +struct i915_gem_context_param_context_image { + /** @engine: Engine class & instance to be configured. */ + struct i915_engine_class_instance engine; + + /** @flags: One of the supported flags or zero. */ + __u32 flags; +#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0) + + /** @size: Size of the image blob pointed to by @image. */ + __u32 size; + + /** @mbz: Must be zero. */ + __u32 mbz; + + /** @image: Userspace memory containing the context image. */ + __u64 image; +} __attribute__((packed)); + +/** + * struct drm_i915_gem_context_create_ext_setparam - Context parameter + * to set or query during context creation. + */ +struct drm_i915_gem_context_create_ext_setparam { + /** @base: Extension link. See struct i915_user_extension. */ + struct i915_user_extension base; + + /** + * @param: Context parameter to set or query. + * See struct drm_i915_gem_context_param. + */ + struct drm_i915_gem_context_param param; +}; + +struct drm_i915_gem_context_destroy { + __u32 ctx_id; + __u32 pad; +}; + +/** + * struct drm_i915_gem_vm_control - Structure to create or destroy VM. + * + * DRM_I915_GEM_VM_CREATE - + * + * Create a new virtual memory address space (ppGTT) for use within a context + * on the same file. Extensions can be provided to configure exactly how the + * address space is setup upon creation. + * + * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is + * returned in the outparam @id. + * + * An extension chain maybe provided, starting with @extensions, and terminated + * by the @next_extension being 0. Currently, no extensions are defined. + * + * DRM_I915_GEM_VM_DESTROY - + * + * Destroys a previously created VM id, specified in @vm_id. + * + * No extensions or flags are allowed currently, and so must be zero. + */ +struct drm_i915_gem_vm_control { + /** @extensions: Zero-terminated chain of extensions. */ + __u64 extensions; + + /** @flags: reserved for future usage, currently MBZ */ + __u32 flags; + + /** @vm_id: Id of the VM created or to be destroyed */ + __u32 vm_id; +}; + +struct drm_i915_reg_read { + /* + * Register offset. + * For 64bit wide registers where the upper 32bits don't immediately + * follow the lower 32bits, the offset of the lower 32bits must + * be specified + */ + __u64 offset; +#define I915_REG_READ_8B_WA (1ul << 0) + + __u64 val; /* Return value */ +}; + +/* Known registers: + * + * Render engine timestamp - 0x2358 + 64bit - gen7+ + * - Note this register returns an invalid value if using the default + * single instruction 8byte read, in order to workaround that pass + * flag I915_REG_READ_8B_WA in offset field. + * + */ + +/* + * struct drm_i915_reset_stats - Return global reset and other context stats + * + * Driver keeps few stats for each contexts and also global reset count. + * This struct can be used to query those stats. + */ +struct drm_i915_reset_stats { + /** @ctx_id: ID of the requested context */ + __u32 ctx_id; + + /** @flags: MBZ */ + __u32 flags; + + /** @reset_count: All resets since boot/module reload, for all contexts + */ + __u32 reset_count; + + /** @batch_active: Number of batches lost when active in GPU, for this + * context */ + __u32 batch_active; + + /** @batch_pending: Number of batches lost pending for execution, for + * this context */ + __u32 batch_pending; + + /** @pad: MBZ */ + __u32 pad; +}; + +/** + * struct drm_i915_gem_userptr - Create GEM object from user allocated memory. + * + * Userptr objects have several restrictions on what ioctls can be used with the + * object handle. + */ +struct drm_i915_gem_userptr { + /** + * @user_ptr: The pointer to the allocated memory. + * + * Needs to be aligned to PAGE_SIZE. + */ + __u64 user_ptr; + + /** + * @user_size: + * + * The size in bytes for the allocated memory. This will also become the + * object size. + * + * Needs to be aligned to PAGE_SIZE, and should be at least PAGE_SIZE, + * or larger. + */ + __u64 user_size; + + /** + * @flags: + * + * Supported flags: + * + * I915_USERPTR_READ_ONLY: + * + * Mark the object as readonly, this also means GPU access can only be + * readonly. This is only supported on HW which supports readonly access + * through the GTT. If the HW can't support readonly access, an error is + * returned. + * + * I915_USERPTR_PROBE: + * + * Probe the provided @user_ptr range and validate that the @user_ptr is + * indeed pointing to normal memory and that the range is also valid. + * For example if some garbage address is given to the kernel, then this + * should complain. + * + * Returns -EFAULT if the probe failed. + * + * Note that this doesn't populate the backing pages, and also doesn't + * guarantee that the object will remain valid when the object is + * eventually used. + * + * The kernel supports this feature if I915_PARAM_HAS_USERPTR_PROBE + * returns a non-zero value. + * + * I915_USERPTR_UNSYNCHRONIZED: + * + * NOT USED. Setting this flag will result in an error. + */ + __u32 flags; +#define I915_USERPTR_READ_ONLY 0x1 +#define I915_USERPTR_PROBE 0x2 +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000 + /** + * @handle: Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; +}; + +enum drm_i915_oa_format { + I915_OA_FORMAT_A13 = 1, /* HSW only */ + I915_OA_FORMAT_A29, /* HSW only */ + I915_OA_FORMAT_A13_B8_C8, /* HSW only */ + I915_OA_FORMAT_B4_C8, /* HSW only */ + I915_OA_FORMAT_A45_B8_C8, /* HSW only */ + I915_OA_FORMAT_B4_C8_A16, /* HSW only */ + I915_OA_FORMAT_C4_B8, /* HSW+ */ + + /* Gen8+ */ + I915_OA_FORMAT_A12, + I915_OA_FORMAT_A12_B8_C8, + I915_OA_FORMAT_A32u40_A4u32_B8_C8, + + /* DG2 */ + I915_OAR_FORMAT_A32u40_A4u32_B8_C8, + I915_OA_FORMAT_A24u40_A14u32_B8_C8, + + /* MTL OAM */ + I915_OAM_FORMAT_MPEC8u64_B8_C8, + I915_OAM_FORMAT_MPEC8u32_B8_C8, + + I915_OA_FORMAT_MAX /* non-ABI */ +}; + +enum drm_i915_perf_property_id { + /** + * Open the stream for a specific context handle (as used with + * execbuffer2). A stream opened for a specific context this way + * won't typically require root privileges. + * + * This property is available in perf revision 1. + */ + DRM_I915_PERF_PROP_CTX_HANDLE = 1, + + /** + * A value of 1 requests the inclusion of raw OA unit reports as + * part of stream samples. + * + * This property is available in perf revision 1. + */ + DRM_I915_PERF_PROP_SAMPLE_OA, + + /** + * The value specifies which set of OA unit metrics should be + * configured, defining the contents of any OA unit reports. + * + * This property is available in perf revision 1. + */ + DRM_I915_PERF_PROP_OA_METRICS_SET, + + /** + * The value specifies the size and layout of OA unit reports. + * + * This property is available in perf revision 1. + */ + DRM_I915_PERF_PROP_OA_FORMAT, + + /** + * Specifying this property implicitly requests periodic OA unit + * sampling and (at least on Haswell) the sampling frequency is derived + * from this exponent as follows: + * + * 80ns * 2^(period_exponent + 1) + * + * This property is available in perf revision 1. + */ + DRM_I915_PERF_PROP_OA_EXPONENT, + + /** + * Specifying this property is only valid when specify a context to + * filter with DRM_I915_PERF_PROP_CTX_HANDLE. Specifying this property + * will hold preemption of the particular context we want to gather + * performance data about. The execbuf2 submissions must include a + * drm_i915_gem_execbuffer_ext_perf parameter for this to apply. + * + * This property is available in perf revision 3. + */ + DRM_I915_PERF_PROP_HOLD_PREEMPTION, + + /** + * Specifying this pins all contexts to the specified SSEU power + * configuration for the duration of the recording. + * + * This parameter's value is a pointer to a struct + * drm_i915_gem_context_param_sseu. + * + * This property is available in perf revision 4. + */ + DRM_I915_PERF_PROP_GLOBAL_SSEU, + + /** + * This optional parameter specifies the timer interval in nanoseconds + * at which the i915 driver will check the OA buffer for available data. + * Minimum allowed value is 100 microseconds. A default value is used by + * the driver if this parameter is not specified. Note that larger timer + * values will reduce cpu consumption during OA perf captures. However, + * excessively large values would potentially result in OA buffer + * overwrites as captures reach end of the OA buffer. + * + * This property is available in perf revision 5. + */ + DRM_I915_PERF_PROP_POLL_OA_PERIOD, + + /** + * Multiple engines may be mapped to the same OA unit. The OA unit is + * identified by class:instance of any engine mapped to it. + * + * This parameter specifies the engine class and must be passed along + * with DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE. + * + * This property is available in perf revision 6. + */ + DRM_I915_PERF_PROP_OA_ENGINE_CLASS, + + /** + * This parameter specifies the engine instance and must be passed along + * with DRM_I915_PERF_PROP_OA_ENGINE_CLASS. + * + * This property is available in perf revision 6. + */ + DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE, + + DRM_I915_PERF_PROP_MAX /* non-ABI */ +}; + +struct drm_i915_perf_open_param { + __u32 flags; +#define I915_PERF_FLAG_FD_CLOEXEC (1 << 0) +#define I915_PERF_FLAG_FD_NONBLOCK (1 << 1) +#define I915_PERF_FLAG_DISABLED (1 << 2) + + /** The number of u64 (id, value) pairs */ + __u32 num_properties; + + /** + * Pointer to array of u64 (id, value) pairs configuring the stream + * to open. + */ + __u64 properties_ptr; +}; + +/* + * Enable data capture for a stream that was either opened in a disabled state + * via I915_PERF_FLAG_DISABLED or was later disabled via + * I915_PERF_IOCTL_DISABLE. + * + * It is intended to be cheaper to disable and enable a stream than it may be + * to close and re-open a stream with the same configuration. + * + * It's undefined whether any pending data for the stream will be lost. + * + * This ioctl is available in perf revision 1. + */ +#define I915_PERF_IOCTL_ENABLE _IO('i', 0x0) + +/* + * Disable data capture for a stream. + * + * It is an error to try and read a stream that is disabled. + * + * This ioctl is available in perf revision 1. + */ +#define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) + +/* + * Change metrics_set captured by a stream. + * + * If the stream is bound to a specific context, the configuration change + * will performed __inline__ with that context such that it takes effect before + * the next execbuf submission. + * + * Returns the previously bound metrics set id, or a negative error code. + * + * This ioctl is available in perf revision 2. + */ +#define I915_PERF_IOCTL_CONFIG _IO('i', 0x2) + +/* + * Common to all i915 perf records + */ +struct drm_i915_perf_record_header { + __u32 type; + __u16 pad; + __u16 size; +}; + +enum drm_i915_perf_record_type { + + /** + * Samples are the work horse record type whose contents are extensible + * and defined when opening an i915 perf stream based on the given + * properties. + * + * Boolean properties following the naming convention + * DRM_I915_PERF_SAMPLE_xyz_PROP request the inclusion of 'xyz' data in + * every sample. + * + * The order of these sample properties given by userspace has no + * affect on the ordering of data within a sample. The order is + * documented here. + * + * struct { + * struct drm_i915_perf_record_header header; + * + * { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA + * }; + */ + DRM_I915_PERF_RECORD_SAMPLE = 1, + + /* + * Indicates that one or more OA reports were not written by the + * hardware. This can happen for example if an MI_REPORT_PERF_COUNT + * command collides with periodic sampling - which would be more likely + * at higher sampling frequencies. + */ + DRM_I915_PERF_RECORD_OA_REPORT_LOST = 2, + + /** + * An error occurred that resulted in all pending OA reports being lost. + */ + DRM_I915_PERF_RECORD_OA_BUFFER_LOST = 3, + + DRM_I915_PERF_RECORD_MAX /* non-ABI */ +}; + +/** + * struct drm_i915_perf_oa_config + * + * Structure to upload perf dynamic configuration into the kernel. + */ +struct drm_i915_perf_oa_config { + /** + * @uuid: + * + * String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x" + */ + char uuid[36]; + + /** + * @n_mux_regs: + * + * Number of mux regs in &mux_regs_ptr. + */ + __u32 n_mux_regs; + + /** + * @n_boolean_regs: + * + * Number of boolean regs in &boolean_regs_ptr. + */ + __u32 n_boolean_regs; + + /** + * @n_flex_regs: + * + * Number of flex regs in &flex_regs_ptr. + */ + __u32 n_flex_regs; + + /** + * @mux_regs_ptr: + * + * Pointer to tuples of u32 values (register address, value) for mux + * registers. Expected length of buffer is (2 * sizeof(u32) * + * &n_mux_regs). + */ + __u64 mux_regs_ptr; + + /** + * @boolean_regs_ptr: + * + * Pointer to tuples of u32 values (register address, value) for mux + * registers. Expected length of buffer is (2 * sizeof(u32) * + * &n_boolean_regs). + */ + __u64 boolean_regs_ptr; + + /** + * @flex_regs_ptr: + * + * Pointer to tuples of u32 values (register address, value) for mux + * registers. Expected length of buffer is (2 * sizeof(u32) * + * &n_flex_regs). + */ + __u64 flex_regs_ptr; +}; + +/** + * struct drm_i915_query_item - An individual query for the kernel to process. + * + * The behaviour is determined by the @query_id. Note that exactly what + * @data_ptr is also depends on the specific @query_id. + */ +struct drm_i915_query_item { + /** + * @query_id: + * + * The id for this query. Currently accepted query IDs are: + * - %DRM_I915_QUERY_TOPOLOGY_INFO (see struct + * drm_i915_query_topology_info) + * - %DRM_I915_QUERY_ENGINE_INFO (see struct drm_i915_engine_info) + * - %DRM_I915_QUERY_PERF_CONFIG (see struct + * drm_i915_query_perf_config) + * - %DRM_I915_QUERY_MEMORY_REGIONS (see struct + * drm_i915_query_memory_regions) + * - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`) + * - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct + * drm_i915_query_topology_info) + * - %DRM_I915_QUERY_GUC_SUBMISSION_VERSION (see struct + * drm_i915_query_guc_submission_version) + */ + __u64 query_id; +#define DRM_I915_QUERY_TOPOLOGY_INFO 1 +#define DRM_I915_QUERY_ENGINE_INFO 2 +#define DRM_I915_QUERY_PERF_CONFIG 3 +#define DRM_I915_QUERY_MEMORY_REGIONS 4 +#define DRM_I915_QUERY_HWCONFIG_BLOB 5 +#define DRM_I915_QUERY_GEOMETRY_SUBSLICES 6 +#define DRM_I915_QUERY_GUC_SUBMISSION_VERSION 7 + /* Must be kept compact -- no holes and well documented */ + + /** + * @length: + * + * When set to zero by userspace, this is filled with the size of the + * data to be written at the @data_ptr pointer. The kernel sets this + * value to a negative value to signal an error on a particular query + * item. + */ + __s32 length; + + /** + * @flags: + * + * When &query_id == %DRM_I915_QUERY_TOPOLOGY_INFO, must be 0. + * + * When &query_id == %DRM_I915_QUERY_PERF_CONFIG, must be one of the + * following: + * + * - %DRM_I915_QUERY_PERF_CONFIG_LIST + * - %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID + * - %DRM_I915_QUERY_PERF_CONFIG_FOR_UUID + * + * When &query_id == %DRM_I915_QUERY_GEOMETRY_SUBSLICES must contain + * a struct i915_engine_class_instance that references a render engine. + */ + __u32 flags; +#define DRM_I915_QUERY_PERF_CONFIG_LIST 1 +#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID 2 +#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID 3 + + /** + * @data_ptr: + * + * Data will be written at the location pointed by @data_ptr when the + * value of @length matches the length of the data to be written by the + * kernel. + */ + __u64 data_ptr; +}; + +/** + * struct drm_i915_query - Supply an array of struct drm_i915_query_item for the + * kernel to fill out. + * + * Note that this is generally a two step process for each struct + * drm_i915_query_item in the array: + * + * 1. Call the DRM_IOCTL_I915_QUERY, giving it our array of struct + * drm_i915_query_item, with &drm_i915_query_item.length set to zero. The + * kernel will then fill in the size, in bytes, which tells userspace how + * memory it needs to allocate for the blob(say for an array of properties). + * + * 2. Next we call DRM_IOCTL_I915_QUERY again, this time with the + * &drm_i915_query_item.data_ptr equal to our newly allocated blob. Note that + * the &drm_i915_query_item.length should still be the same as what the + * kernel previously set. At this point the kernel can fill in the blob. + * + * Note that for some query items it can make sense for userspace to just pass + * in a buffer/blob equal to or larger than the required size. In this case only + * a single ioctl call is needed. For some smaller query items this can work + * quite well. + * + */ +struct drm_i915_query { + /** @num_items: The number of elements in the @items_ptr array */ + __u32 num_items; + + /** + * @flags: Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /** + * @items_ptr: + * + * Pointer to an array of struct drm_i915_query_item. The number of + * array elements is @num_items. + */ + __u64 items_ptr; +}; + +/** + * struct drm_i915_query_topology_info + * + * Describes slice/subslice/EU information queried by + * %DRM_I915_QUERY_TOPOLOGY_INFO + */ +struct drm_i915_query_topology_info { + /** + * @flags: + * + * Unused for now. Must be cleared to zero. + */ + __u16 flags; + + /** + * @max_slices: + * + * The number of bits used to express the slice mask. + */ + __u16 max_slices; + + /** + * @max_subslices: + * + * The number of bits used to express the subslice mask. + */ + __u16 max_subslices; + + /** + * @max_eus_per_subslice: + * + * The number of bits in the EU mask that correspond to a single + * subslice's EUs. + */ + __u16 max_eus_per_subslice; + + /** + * @subslice_offset: + * + * Offset in data[] at which the subslice masks are stored. + */ + __u16 subslice_offset; + + /** + * @subslice_stride: + * + * Stride at which each of the subslice masks for each slice are + * stored. + */ + __u16 subslice_stride; + + /** + * @eu_offset: + * + * Offset in data[] at which the EU masks are stored. + */ + __u16 eu_offset; + + /** + * @eu_stride: + * + * Stride at which each of the EU masks for each subslice are stored. + */ + __u16 eu_stride; + + /** + * @data: + * + * Contains 3 pieces of information : + * + * - The slice mask with one bit per slice telling whether a slice is + * available. The availability of slice X can be queried with the + * following formula : + * + * .. code:: c + * + * (data[X / 8] >> (X % 8)) & 1 + * + * Starting with Xe_HP platforms, Intel hardware no longer has + * traditional slices so i915 will always report a single slice + * (hardcoded slicemask = 0x1) which contains all of the platform's + * subslices. I.e., the mask here does not reflect any of the newer + * hardware concepts such as "gslices" or "cslices" since userspace + * is capable of inferring those from the subslice mask. + * + * - The subslice mask for each slice with one bit per subslice telling + * whether a subslice is available. Starting with Gen12 we use the + * term "subslice" to refer to what the hardware documentation + * describes as a "dual-subslices." The availability of subslice Y + * in slice X can be queried with the following formula : + * + * .. code:: c + * + * (data[subslice_offset + X * subslice_stride + Y / 8] >> (Y % 8)) + * & 1 + * + * - The EU mask for each subslice in each slice, with one bit per EU + * telling whether an EU is available. The availability of EU Z in + * subslice Y in slice X can be queried with the following formula : + * + * .. code:: c + * + * (data[eu_offset + + * (X * max_subslices + Y) * eu_stride + + * Z / 8 + * ] >> (Z % 8)) & 1 + */ + __u8 data[]; +}; + +/** + * DOC: Engine Discovery uAPI + * + * Engine discovery uAPI is a way of enumerating physical engines present in a + * GPU associated with an open i915 DRM file descriptor. This supersedes the old + * way of using `DRM_IOCTL_I915_GETPARAM` and engine identifiers like + * `I915_PARAM_HAS_BLT`. + * + * The need for this interface came starting with Icelake and newer GPUs, which + * started to establish a pattern of having multiple engines of a same class, + * where not all instances were always completely functionally equivalent. + * + * Entry point for this uapi is `DRM_IOCTL_I915_QUERY` with the + * `DRM_I915_QUERY_ENGINE_INFO` as the queried item id. + * + * Example for getting the list of engines: + * + * .. code-block:: C + * + * struct drm_i915_query_engine_info *info; + * struct drm_i915_query_item item = { + * .query_id = DRM_I915_QUERY_ENGINE_INFO; + * }; + * struct drm_i915_query query = { + * .num_items = 1, + * .items_ptr = (uintptr_t)&item, + * }; + * int err, i; + * + * // First query the size of the blob we need, this needs to be large + * // enough to hold our array of engines. The kernel will fill out the + * // item.length for us, which is the number of bytes we need. + * // + * // Alternatively a large buffer can be allocated straightaway enabling + * // querying in one pass, in which case item.length should contain the + * // length of the provided buffer. + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * info = calloc(1, item.length); + * // Now that we allocated the required number of bytes, we call the ioctl + * // again, this time with the data_ptr pointing to our newly allocated + * // blob, which the kernel can then populate with info on all engines. + * item.data_ptr = (uintptr_t)&info; + * + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * // We can now access each engine in the array + * for (i = 0; i < info->num_engines; i++) { + * struct drm_i915_engine_info einfo = info->engines[i]; + * u16 class = einfo.engine.class; + * u16 instance = einfo.engine.instance; + * .... + * } + * + * free(info); + * + * Each of the enumerated engines, apart from being defined by its class and + * instance (see `struct i915_engine_class_instance`), also can have flags and + * capabilities defined as documented in i915_drm.h. + * + * For instance video engines which support HEVC encoding will have the + * `I915_VIDEO_CLASS_CAPABILITY_HEVC` capability bit set. + * + * Engine discovery only fully comes to its own when combined with the new way + * of addressing engines when submitting batch buffers using contexts with + * engine maps configured. + */ + +/** + * struct drm_i915_engine_info + * + * Describes one engine and its capabilities as known to the driver. + */ +struct drm_i915_engine_info { + /** @engine: Engine class and instance. */ + struct i915_engine_class_instance engine; + + /** @rsvd0: Reserved field. */ + __u32 rsvd0; + + /** @flags: Engine flags. */ + __u64 flags; +#define I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE (1 << 0) + + /** @capabilities: Capabilities of this engine. */ + __u64 capabilities; +#define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0) +#define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1) + + /** @logical_instance: Logical instance of engine */ + __u16 logical_instance; + + /** @rsvd1: Reserved fields. */ + __u16 rsvd1[3]; + /** @rsvd2: Reserved fields. */ + __u64 rsvd2[3]; +}; + +/** + * struct drm_i915_query_engine_info + * + * Engine info query enumerates all engines known to the driver by filling in + * an array of struct drm_i915_engine_info structures. + */ +struct drm_i915_query_engine_info { + /** @num_engines: Number of struct drm_i915_engine_info structs + * following. */ + __u32 num_engines; + + /** @rsvd: MBZ */ + __u32 rsvd[3]; + + /** @engines: Marker for drm_i915_engine_info structures. */ + struct drm_i915_engine_info engines[]; +}; + +/** + * struct drm_i915_query_perf_config + * + * Data written by the kernel with query %DRM_I915_QUERY_PERF_CONFIG and + * %DRM_I915_QUERY_GEOMETRY_SUBSLICES. + */ +struct drm_i915_query_perf_config { + union { + /** + * @n_configs: + * + * When &drm_i915_query_item.flags == + * %DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets this fields to + * the number of configurations available. + */ + __u64 n_configs; + + /** + * @config: + * + * When &drm_i915_query_item.flags == + * %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID, i915 will use the + * value in this field as configuration identifier to decide + * what data to write into config_ptr. + */ + __u64 config; + + /** + * @uuid: + * + * When &drm_i915_query_item.flags == + * %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID, i915 will use the + * value in this field as configuration identifier to decide + * what data to write into config_ptr. + * + * String formatted like "%08x-%04x-%04x-%04x-%012x" + */ + char uuid[36]; + }; + + /** + * @flags: + * + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /** + * @data: + * + * When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_LIST, + * i915 will write an array of __u64 of configuration identifiers. + * + * When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_DATA, + * i915 will write a struct drm_i915_perf_oa_config. If the following + * fields of struct drm_i915_perf_oa_config are not set to 0, i915 will + * write into the associated pointers the values of submitted when the + * configuration was created : + * + * - &drm_i915_perf_oa_config.n_mux_regs + * - &drm_i915_perf_oa_config.n_boolean_regs + * - &drm_i915_perf_oa_config.n_flex_regs + */ + __u8 data[]; +}; + +/** + * enum drm_i915_gem_memory_class - Supported memory classes + */ +enum drm_i915_gem_memory_class { + /** @I915_MEMORY_CLASS_SYSTEM: System memory */ + I915_MEMORY_CLASS_SYSTEM = 0, + /** @I915_MEMORY_CLASS_DEVICE: Device local-memory */ + I915_MEMORY_CLASS_DEVICE, +}; + +/** + * struct drm_i915_gem_memory_class_instance - Identify particular memory region + */ +struct drm_i915_gem_memory_class_instance { + /** @memory_class: See enum drm_i915_gem_memory_class */ + __u16 memory_class; + + /** @memory_instance: Which instance */ + __u16 memory_instance; +}; + +/** + * struct drm_i915_memory_region_info - Describes one region as known to the + * driver. + * + * Note this is using both struct drm_i915_query_item and struct drm_i915_query. + * For this new query we are adding the new query id + * DRM_I915_QUERY_MEMORY_REGIONS at &drm_i915_query_item.query_id. + */ +struct drm_i915_memory_region_info { + /** @region: The class:instance pair encoding */ + struct drm_i915_gem_memory_class_instance region; + + /** @rsvd0: MBZ */ + __u32 rsvd0; + + /** + * @probed_size: Memory probed by the driver + * + * Note that it should not be possible to ever encounter a zero value + * here, also note that no current region type will ever return -1 here. + * Although for future region types, this might be a possibility. The + * same applies to the other size fields. + */ + __u64 probed_size; + + /** + * @unallocated_size: Estimate of memory remaining + * + * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable accounting. + * Without this (or if this is an older kernel) the value here will + * always equal the @probed_size. Note this is only currently tracked + * for I915_MEMORY_CLASS_DEVICE regions (for other types the value here + * will always equal the @probed_size). + */ + __u64 unallocated_size; + + union { + /** @rsvd1: MBZ */ + __u64 rsvd1[8]; + struct { + /** + * @probed_cpu_visible_size: Memory probed by the driver + * that is CPU accessible. + * + * This will be always be <= @probed_size, and the + * remainder (if there is any) will not be CPU + * accessible. + * + * On systems without small BAR, the @probed_size will + * always equal the @probed_cpu_visible_size, since all + * of it will be CPU accessible. + * + * Note this is only tracked for + * I915_MEMORY_CLASS_DEVICE regions (for other types the + * value here will always equal the @probed_size). + * + * Note that if the value returned here is zero, then + * this must be an old kernel which lacks the relevant + * small-bar uAPI support (including + * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS), but on + * such systems we should never actually end up with a + * small BAR configuration, assuming we are able to load + * the kernel module. Hence it should be safe to treat + * this the same as when @probed_cpu_visible_size == + * @probed_size. + */ + __u64 probed_cpu_visible_size; + + /** + * @unallocated_cpu_visible_size: Estimate of CPU + * visible memory remaining. + * + * Note this is only tracked for + * I915_MEMORY_CLASS_DEVICE regions (for other types the + * value here will always equal the + * @probed_cpu_visible_size). + * + * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable + * accounting. Without this the value here will always + * equal the @probed_cpu_visible_size. Note this is only + * currently tracked for I915_MEMORY_CLASS_DEVICE + * regions (for other types the value here will also + * always equal the @probed_cpu_visible_size). + * + * If this is an older kernel the value here will be + * zero, see also @probed_cpu_visible_size. + */ + __u64 unallocated_cpu_visible_size; + }; + }; +}; + +/** + * struct drm_i915_query_memory_regions + * + * The region info query enumerates all regions known to the driver by filling + * in an array of struct drm_i915_memory_region_info structures. + * + * Example for getting the list of supported regions: + * + * .. code-block:: C + * + * struct drm_i915_query_memory_regions *info; + * struct drm_i915_query_item item = { + * .query_id = DRM_I915_QUERY_MEMORY_REGIONS; + * }; + * struct drm_i915_query query = { + * .num_items = 1, + * .items_ptr = (uintptr_t)&item, + * }; + * int err, i; + * + * // First query the size of the blob we need, this needs to be large + * // enough to hold our array of regions. The kernel will fill out the + * // item.length for us, which is the number of bytes we need. + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * info = calloc(1, item.length); + * // Now that we allocated the required number of bytes, we call the ioctl + * // again, this time with the data_ptr pointing to our newly allocated + * // blob, which the kernel can then populate with the all the region + * info. item.data_ptr = (uintptr_t)&info, + * + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * // We can now access each region in the array + * for (i = 0; i < info->num_regions; i++) { + * struct drm_i915_memory_region_info mr = info->regions[i]; + * u16 class = mr.region.class; + * u16 instance = mr.region.instance; + * + * .... + * } + * + * free(info); + */ +struct drm_i915_query_memory_regions { + /** @num_regions: Number of supported regions */ + __u32 num_regions; + + /** @rsvd: MBZ */ + __u32 rsvd[3]; + + /** @regions: Info about each supported region */ + struct drm_i915_memory_region_info regions[]; +}; + +/** + * struct drm_i915_query_guc_submission_version - query GuC submission interface + * version + */ +struct drm_i915_query_guc_submission_version { + /** @branch: Firmware branch version. */ + __u32 branch; + /** @major: Firmware major version. */ + __u32 major; + /** @minor: Firmware minor version. */ + __u32 minor; + /** @patch: Firmware patch version. */ + __u32 patch; +}; + +/** + * DOC: GuC HWCONFIG blob uAPI + * + * The GuC produces a blob with information about the current device. + * i915 reads this blob from GuC and makes it available via this uAPI. + * + * The format and meaning of the blob content are documented in the + * Programmer's Reference Manual. + */ + +/** + * struct drm_i915_gem_create_ext - Existing gem_create behaviour, with added + * extension support using struct i915_user_extension. + * + * Note that new buffer flags should be added here, at least for the stuff that + * is immutable. Previously we would have two ioctls, one to create the object + * with gem_create, and another to apply various parameters, however this + * creates some ambiguity for the params which are considered immutable. Also in + * general we're phasing out the various SET/GET ioctls. + */ +struct drm_i915_gem_create_ext { + /** + * @size: Requested size for the object. + * + * The (page-aligned) allocated size for the object will be returned. + * + * On platforms like DG2/ATS the kernel will always use 64K or larger + * pages for I915_MEMORY_CLASS_DEVICE. The kernel also requires a + * minimum of 64K GTT alignment for such objects. + * + * NOTE: Previously the ABI here required a minimum GTT alignment of 2M + * on DG2/ATS, due to how the hardware implemented 64K GTT page support, + * where we had the following complications: + * + * 1) The entire PDE (which covers a 2MB virtual address range), must + * contain only 64K PTEs, i.e mixing 4K and 64K PTEs in the same + * PDE is forbidden by the hardware. + * + * 2) We still need to support 4K PTEs for I915_MEMORY_CLASS_SYSTEM + * objects. + * + * However on actual production HW this was completely changed to now + * allow setting a TLB hint at the PTE level (see PS64), which is a lot + * more flexible than the above. With this the 2M restriction was + * dropped where we now only require 64K. + */ + __u64 size; + + /** + * @handle: Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; + + /** + * @flags: Optional flags. + * + * Supported values: + * + * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that + * the object will need to be accessed via the CPU. + * + * Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and only + * strictly required on configurations where some subset of the device + * memory is directly visible/mappable through the CPU (which we also + * call small BAR), like on some DG2+ systems. Note that this is quite + * undesirable, but due to various factors like the client CPU, BIOS etc + * it's something we can expect to see in the wild. See + * &drm_i915_memory_region_info.probed_cpu_visible_size for how to + * determine if this system applies. + * + * Note that one of the placements MUST be I915_MEMORY_CLASS_SYSTEM, to + * ensure the kernel can always spill the allocation to system memory, + * if the object can't be allocated in the mappable part of + * I915_MEMORY_CLASS_DEVICE. + * + * Also note that since the kernel only supports flat-CCS on objects + * that can *only* be placed in I915_MEMORY_CLASS_DEVICE, we therefore + * don't support I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS together with + * flat-CCS. + * + * Without this hint, the kernel will assume that non-mappable + * I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the + * kernel can still migrate the object to the mappable part, as a last + * resort, if userspace ever CPU faults this object, but this might be + * expensive, and so ideally should be avoided. + * + * On older kernels which lack the relevant small-bar uAPI support (see + * also &drm_i915_memory_region_info.probed_cpu_visible_size), + * usage of the flag will result in an error, but it should NEVER be + * possible to end up with a small BAR configuration, assuming we can + * also successfully load the i915 kernel module. In such cases the + * entire I915_MEMORY_CLASS_DEVICE region will be CPU accessible, and as + * such there are zero restrictions on where the object can be placed. + */ +#define I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS (1 << 0) + __u32 flags; + + /** + * @extensions: The chain of extensions to apply to this object. + * + * This will be useful in the future when we need to support several + * different extensions, and we need to apply more than one when + * creating the object. See struct i915_user_extension. + * + * If we don't supply any extensions then we get the same old gem_create + * behaviour. + * + * For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see + * struct drm_i915_gem_create_ext_memory_regions. + * + * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see + * struct drm_i915_gem_create_ext_protected_content. + * + * For I915_GEM_CREATE_EXT_SET_PAT usage see + * struct drm_i915_gem_create_ext_set_pat. + */ +#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0 +#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1 +#define I915_GEM_CREATE_EXT_SET_PAT 2 + __u64 extensions; +}; + +/** + * struct drm_i915_gem_create_ext_memory_regions - The + * I915_GEM_CREATE_EXT_MEMORY_REGIONS extension. + * + * Set the object with the desired set of placements/regions in priority + * order. Each entry must be unique and supported by the device. + * + * This is provided as an array of struct drm_i915_gem_memory_class_instance, or + * an equivalent layout of class:instance pair encodings. See struct + * drm_i915_query_memory_regions and DRM_I915_QUERY_MEMORY_REGIONS for how to + * query the supported regions for a device. + * + * As an example, on discrete devices, if we wish to set the placement as + * device local-memory we can do something like: + * + * .. code-block:: C + * + * struct drm_i915_gem_memory_class_instance region_lmem = { + * .memory_class = I915_MEMORY_CLASS_DEVICE, + * .memory_instance = 0, + * }; + * struct drm_i915_gem_create_ext_memory_regions regions = { + * .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS }, + * .regions = (uintptr_t)®ion_lmem, + * .num_regions = 1, + * }; + * struct drm_i915_gem_create_ext create_ext = { + * .size = 16 * PAGE_SIZE, + * .extensions = (uintptr_t)®ions, + * }; + * + * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext); + * if (err) ... + * + * At which point we get the object handle in &drm_i915_gem_create_ext.handle, + * along with the final object size in &drm_i915_gem_create_ext.size, which + * should account for any rounding up, if required. + * + * Note that userspace has no means of knowing the current backing region + * for objects where @num_regions is larger than one. The kernel will only + * ensure that the priority order of the @regions array is honoured, either + * when initially placing the object, or when moving memory around due to + * memory pressure + * + * On Flat-CCS capable HW, compression is supported for the objects residing + * in I915_MEMORY_CLASS_DEVICE. When such objects (compressed) have other + * memory class in @regions and migrated (by i915, due to memory + * constraints) to the non I915_MEMORY_CLASS_DEVICE region, then i915 needs to + * decompress the content. But i915 doesn't have the required information to + * decompress the userspace compressed objects. + * + * So i915 supports Flat-CCS, on the objects which can reside only on + * I915_MEMORY_CLASS_DEVICE regions. + */ +struct drm_i915_gem_create_ext_memory_regions { + /** @base: Extension link. See struct i915_user_extension. */ + struct i915_user_extension base; + + /** @pad: MBZ */ + __u32 pad; + /** @num_regions: Number of elements in the @regions array. */ + __u32 num_regions; + /** + * @regions: The regions/placements array. + * + * An array of struct drm_i915_gem_memory_class_instance. + */ + __u64 regions; +}; + +/** + * struct drm_i915_gem_create_ext_protected_content - The + * I915_OBJECT_PARAM_PROTECTED_CONTENT extension. + * + * If this extension is provided, buffer contents are expected to be protected + * by PXP encryption and require decryption for scan out and processing. This + * is only possible on platforms that have PXP enabled, on all other scenarios + * using this extension will cause the ioctl to fail and return -ENODEV. The + * flags parameter is reserved for future expansion and must currently be set + * to zero. + * + * The buffer contents are considered invalid after a PXP session teardown. + * + * The encryption is guaranteed to be processed correctly only if the object + * is submitted with a context created using the + * I915_CONTEXT_PARAM_PROTECTED_CONTENT flag. This will also enable extra checks + * at submission time on the validity of the objects involved. + * + * Below is an example on how to create a protected object: + * + * .. code-block:: C + * + * struct drm_i915_gem_create_ext_protected_content protected_ext = { + * .base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT }, + * .flags = 0, + * }; + * struct drm_i915_gem_create_ext create_ext = { + * .size = PAGE_SIZE, + * .extensions = (uintptr_t)&protected_ext, + * }; + * + * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext); + * if (err) ... + */ +struct drm_i915_gem_create_ext_protected_content { + /** @base: Extension link. See struct i915_user_extension. */ + struct i915_user_extension base; + /** @flags: reserved for future usage, currently MBZ */ + __u32 flags; +}; + +/** + * struct drm_i915_gem_create_ext_set_pat - The + * I915_GEM_CREATE_EXT_SET_PAT extension. + * + * If this extension is provided, the specified caching policy (PAT index) is + * applied to the buffer object. + * + * Below is an example on how to create an object with specific caching policy: + * + * .. code-block:: C + * + * struct drm_i915_gem_create_ext_set_pat set_pat_ext = { + * .base = { .name = I915_GEM_CREATE_EXT_SET_PAT }, + * .pat_index = 0, + * }; + * struct drm_i915_gem_create_ext create_ext = { + * .size = PAGE_SIZE, + * .extensions = (uintptr_t)&set_pat_ext, + * }; + * + * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext); + * if (err) ... + */ +struct drm_i915_gem_create_ext_set_pat { + /** @base: Extension link. See struct i915_user_extension. */ + struct i915_user_extension base; + /** + * @pat_index: PAT index to be set + * PAT index is a bit field in Page Table Entry to control caching + * behaviors for GPU accesses. The definition of PAT index is + * platform dependent and can be found in hardware specifications, + */ + __u32 pat_index; + /** @rsvd: reserved for future use */ + __u32 rsvd; +}; + +/* ID of the protected content session managed by i915 when PXP is active */ +#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf + +#if defined(__cplusplus) +} +#endif + +#endif /* _I915_DRM_H_ */ diff --git a/include/arch/x86_64/drm/ivpu_accel.h b/include/arch/x86_64/drm/ivpu_accel.h new file mode 100644 index 00000000..1198ec64 --- /dev/null +++ b/include/arch/x86_64/drm/ivpu_accel.h @@ -0,0 +1,511 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Copyright (C) 2020-2025 Intel Corporation + */ + +#ifndef __UAPI_IVPU_DRM_H__ +#define __UAPI_IVPU_DRM_H__ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_IVPU_GET_PARAM 0x00 +#define DRM_IVPU_SET_PARAM 0x01 +#define DRM_IVPU_BO_CREATE 0x02 +#define DRM_IVPU_BO_INFO 0x03 +#define DRM_IVPU_SUBMIT 0x05 +#define DRM_IVPU_BO_WAIT 0x06 +#define DRM_IVPU_METRIC_STREAMER_START 0x07 +#define DRM_IVPU_METRIC_STREAMER_STOP 0x08 +#define DRM_IVPU_METRIC_STREAMER_GET_DATA 0x09 +#define DRM_IVPU_METRIC_STREAMER_GET_INFO 0x0a +#define DRM_IVPU_CMDQ_CREATE 0x0b +#define DRM_IVPU_CMDQ_DESTROY 0x0c +#define DRM_IVPU_CMDQ_SUBMIT 0x0d + +#define DRM_IOCTL_IVPU_GET_PARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param) + +#define DRM_IOCTL_IVPU_SET_PARAM \ + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_SET_PARAM, struct drm_ivpu_param) + +#define DRM_IOCTL_IVPU_BO_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_CREATE, \ + struct drm_ivpu_bo_create) + +#define DRM_IOCTL_IVPU_BO_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_INFO, struct drm_ivpu_bo_info) + +#define DRM_IOCTL_IVPU_SUBMIT \ + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_SUBMIT, struct drm_ivpu_submit) + +#define DRM_IOCTL_IVPU_BO_WAIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_WAIT, struct drm_ivpu_bo_wait) + +#define DRM_IOCTL_IVPU_METRIC_STREAMER_START \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_START, \ + struct drm_ivpu_metric_streamer_start) + +#define DRM_IOCTL_IVPU_METRIC_STREAMER_STOP \ + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_STOP, \ + struct drm_ivpu_metric_streamer_stop) + +#define DRM_IOCTL_IVPU_METRIC_STREAMER_GET_DATA \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_GET_DATA, \ + struct drm_ivpu_metric_streamer_get_data) + +#define DRM_IOCTL_IVPU_METRIC_STREAMER_GET_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_GET_INFO, \ + struct drm_ivpu_metric_streamer_get_data) + +#define DRM_IOCTL_IVPU_CMDQ_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_CMDQ_CREATE, \ + struct drm_ivpu_cmdq_create) + +#define DRM_IOCTL_IVPU_CMDQ_DESTROY \ + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_CMDQ_DESTROY, \ + struct drm_ivpu_cmdq_destroy) + +#define DRM_IOCTL_IVPU_CMDQ_SUBMIT \ + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_CMDQ_SUBMIT, \ + struct drm_ivpu_cmdq_submit) + +/** + * DOC: contexts + * + * VPU contexts have private virtual address space, job queues and priority. + * Each context is identified by an unique ID. Context is created on open(). + */ + +#define DRM_IVPU_PARAM_DEVICE_ID 0 +#define DRM_IVPU_PARAM_DEVICE_REVISION 1 +#define DRM_IVPU_PARAM_PLATFORM_TYPE 2 +#define DRM_IVPU_PARAM_CORE_CLOCK_RATE 3 +#define DRM_IVPU_PARAM_NUM_CONTEXTS 4 +#define DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS 5 +#define DRM_IVPU_PARAM_CONTEXT_PRIORITY 6 /* Deprecated */ +#define DRM_IVPU_PARAM_CONTEXT_ID 7 +#define DRM_IVPU_PARAM_FW_API_VERSION 8 +#define DRM_IVPU_PARAM_ENGINE_HEARTBEAT 9 +#define DRM_IVPU_PARAM_UNIQUE_INFERENCE_ID 10 +#define DRM_IVPU_PARAM_TILE_CONFIG 11 +#define DRM_IVPU_PARAM_SKU 12 +#define DRM_IVPU_PARAM_CAPABILITIES 13 + +#define DRM_IVPU_PLATFORM_TYPE_SILICON 0 + +/* Deprecated, use DRM_IVPU_JOB_PRIORITY */ +#define DRM_IVPU_CONTEXT_PRIORITY_IDLE 0 +#define DRM_IVPU_CONTEXT_PRIORITY_NORMAL 1 +#define DRM_IVPU_CONTEXT_PRIORITY_FOCUS 2 +#define DRM_IVPU_CONTEXT_PRIORITY_REALTIME 3 + +#define DRM_IVPU_JOB_PRIORITY_DEFAULT 0 +#define DRM_IVPU_JOB_PRIORITY_IDLE 1 +#define DRM_IVPU_JOB_PRIORITY_NORMAL 2 +#define DRM_IVPU_JOB_PRIORITY_FOCUS 3 +#define DRM_IVPU_JOB_PRIORITY_REALTIME 4 + +/** + * DRM_IVPU_CAP_METRIC_STREAMER + * + * Metric streamer support. Provides sampling of various hardware performance + * metrics like DMA bandwidth and cache miss/hits. Can be used for profiling. + */ +#define DRM_IVPU_CAP_METRIC_STREAMER 1 +/** + * DRM_IVPU_CAP_DMA_MEMORY_RANGE + * + * Driver has capability to allocate separate memory range + * accessible by hardware DMA. + */ +#define DRM_IVPU_CAP_DMA_MEMORY_RANGE 2 +/** + * DRM_IVPU_CAP_MANAGE_CMDQ + * + * Driver supports explicit command queue operations like command queue create, + * command queue destroy and submit job on specific command queue. + */ +#define DRM_IVPU_CAP_MANAGE_CMDQ 3 + +/** + * struct drm_ivpu_param - Get/Set VPU parameters + */ +struct drm_ivpu_param { + /** + * @param: + * + * Supported params: + * + * %DRM_IVPU_PARAM_DEVICE_ID: + * PCI Device ID of the VPU device (read-only) + * + * %DRM_IVPU_PARAM_DEVICE_REVISION: + * VPU device revision (read-only) + * + * %DRM_IVPU_PARAM_PLATFORM_TYPE: + * Returns %DRM_IVPU_PLATFORM_TYPE_SILICON on real hardware or device + * specific platform type when executing on a simulator or emulator + * (read-only) + * + * %DRM_IVPU_PARAM_CORE_CLOCK_RATE: + * Maximum frequency of the NPU data processing unit clock (read-only) + * + * %DRM_IVPU_PARAM_NUM_CONTEXTS: + * Maximum number of simultaneously existing contexts (read-only) + * + * %DRM_IVPU_PARAM_CONTEXT_BASE_ADDRESS: + * Lowest VPU virtual address available in the current context + * (read-only) + * + * %DRM_IVPU_PARAM_CONTEXT_ID: + * Current context ID, always greater than 0 (read-only) + * + * %DRM_IVPU_PARAM_FW_API_VERSION: + * Firmware API version array (read-only) + * + * %DRM_IVPU_PARAM_ENGINE_HEARTBEAT: + * Heartbeat value from an engine (read-only). + * Engine ID (i.e. DRM_IVPU_ENGINE_COMPUTE) is given via index. + * + * %DRM_IVPU_PARAM_UNIQUE_INFERENCE_ID: + * Device-unique inference ID (read-only) + * + * %DRM_IVPU_PARAM_TILE_CONFIG: + * VPU tile configuration (read-only) + * + * %DRM_IVPU_PARAM_SKU: + * VPU SKU ID (read-only) + * + * %DRM_IVPU_PARAM_CAPABILITIES: + * Supported capabilities (read-only) + */ + __u32 param; + + /** @index: Index for params that have multiple instances */ + __u32 index; + + /** @value: Param value */ + __u64 value; +}; + +#define DRM_IVPU_BO_SHAVE_MEM 0x00000001 +#define DRM_IVPU_BO_HIGH_MEM DRM_IVPU_BO_SHAVE_MEM +#define DRM_IVPU_BO_MAPPABLE 0x00000002 +#define DRM_IVPU_BO_DMA_MEM 0x00000004 + +#define DRM_IVPU_BO_CACHED 0x00000000 +#define DRM_IVPU_BO_UNCACHED 0x00010000 +#define DRM_IVPU_BO_WC 0x00020000 +#define DRM_IVPU_BO_CACHE_MASK 0x00030000 + +#define DRM_IVPU_BO_FLAGS \ + (DRM_IVPU_BO_HIGH_MEM | DRM_IVPU_BO_MAPPABLE | DRM_IVPU_BO_DMA_MEM | \ + DRM_IVPU_BO_CACHE_MASK) + +/** + * struct drm_ivpu_bo_create - Create BO backed by SHMEM + * + * Create GEM buffer object allocated in SHMEM memory. + */ +struct drm_ivpu_bo_create { + /** @size: The size in bytes of the allocated memory */ + __u64 size; + + /** + * @flags: + * + * Supported flags: + * + * %DRM_IVPU_BO_HIGH_MEM: + * + * Allocate VPU address from >4GB range. + * Buffer object with vpu address >4GB can be always accessed by the + * VPU DMA engine, but some HW generation may not be able to access + * this memory from then firmware running on the VPU management + * processor. Suitable for input, output and some scratch buffers. + * + * %DRM_IVPU_BO_MAPPABLE: + * + * Buffer object can be mapped using mmap(). + * + * %DRM_IVPU_BO_CACHED: + * + * Allocated BO will be cached on host side (WB) and snooped on the VPU + * side. This is the default caching mode. + * + * %DRM_IVPU_BO_UNCACHED: + * + * Not supported. Use DRM_IVPU_BO_WC instead. + * + * %DRM_IVPU_BO_WC: + * + * Allocated BO will use write combining buffer for writes but reads + * will be uncached. + */ + __u32 flags; + + /** @handle: Returned GEM object handle */ + __u32 handle; + + /** @vpu_addr: Returned VPU virtual address */ + __u64 vpu_addr; +}; + +/** + * struct drm_ivpu_bo_info - Query buffer object info + */ +struct drm_ivpu_bo_info { + /** @handle: Handle of the queried BO */ + __u32 handle; + + /** @flags: Returned flags used to create the BO */ + __u32 flags; + + /** @vpu_addr: Returned VPU virtual address */ + __u64 vpu_addr; + + /** + * @mmap_offset: + * + * Returned offset to be used in mmap(). 0 in case the BO is not + * mappable. + */ + __u64 mmap_offset; + + /** @size: Returned GEM object size, aligned to PAGE_SIZE */ + __u64 size; +}; + +/* drm_ivpu_submit engines */ +#define DRM_IVPU_ENGINE_COMPUTE 0 +#define DRM_IVPU_ENGINE_COPY 1 /* Deprecated */ + +/** + * struct drm_ivpu_submit - Submit commands to the VPU + * + * Execute a single command buffer on a given VPU engine. + * Handles to all referenced buffer objects have to be provided in @buffers_ptr. + * + * User space may wait on job completion using %DRM_IVPU_BO_WAIT ioctl. + */ +struct drm_ivpu_submit { + /** + * @buffers_ptr: + * + * A pointer to an u32 array of GEM handles of the BOs required for this + * job. The number of elements in the array must be equal to the value + * given by @buffer_count. + * + * The first BO is the command buffer. The rest of array has to contain + * all BOs referenced from the command buffer. + */ + __u64 buffers_ptr; + + /** @buffer_count: Number of elements in the @buffers_ptr */ + __u32 buffer_count; + + /** + * @engine: Select the engine this job should be executed on + * + * %DRM_IVPU_ENGINE_COMPUTE: + * + * Performs Deep Learning Neural Compute Inference Operations + */ + __u32 engine; + + /** @flags: Reserved for future use - must be zero */ + __u32 flags; + + /** + * @commands_offset: + * + * Offset inside the first buffer in @buffers_ptr containing commands + * to be executed. The offset has to be 8-byte aligned. + */ + __u32 commands_offset; + + /** + * @priority: + * + * Priority to be set for related job command queue, can be one of the + * following: %DRM_IVPU_JOB_PRIORITY_DEFAULT %DRM_IVPU_JOB_PRIORITY_IDLE + * %DRM_IVPU_JOB_PRIORITY_NORMAL + * %DRM_IVPU_JOB_PRIORITY_FOCUS + * %DRM_IVPU_JOB_PRIORITY_REALTIME + */ + __u32 priority; +}; + +/** + * struct drm_ivpu_cmdq_submit - Submit commands to the VPU using explicit + * command queue + * + * Execute a single command buffer on a given command queue. + * Handles to all referenced buffer objects have to be provided in @buffers_ptr. + * + * User space may wait on job completion using %DRM_IVPU_BO_WAIT ioctl. + */ +struct drm_ivpu_cmdq_submit { + /** + * @buffers_ptr: + * + * A pointer to an u32 array of GEM handles of the BOs required for this + * job. The number of elements in the array must be equal to the value + * given by @buffer_count. + * + * The first BO is the command buffer. The rest of array has to contain + * all BOs referenced from the command buffer. + */ + __u64 buffers_ptr; + + /** @buffer_count: Number of elements in the @buffers_ptr */ + __u32 buffer_count; + + /** @cmdq_id: ID for the command queue where job will be submitted */ + __u32 cmdq_id; + + /** @flags: Reserved for future use - must be zero */ + __u32 flags; + + /** + * @commands_offset: + * + * Offset inside the first buffer in @buffers_ptr containing commands + * to be executed. The offset has to be 8-byte aligned. + */ + __u32 commands_offset; +}; + +/* drm_ivpu_bo_wait job status codes */ +#define DRM_IVPU_JOB_STATUS_SUCCESS 0 +#define DRM_IVPU_JOB_STATUS_ABORTED 256 + +/** + * struct drm_ivpu_bo_wait - Wait for BO to become inactive + * + * Blocks until a given buffer object becomes inactive. + * With @timeout_ms set to 0 returns immediately. + */ +struct drm_ivpu_bo_wait { + /** @handle: Handle to the buffer object to be waited on */ + __u32 handle; + + /** @flags: Reserved for future use - must be zero */ + __u32 flags; + + /** @timeout_ns: Absolute timeout in nanoseconds (may be zero) */ + __s64 timeout_ns; + + /** + * @job_status: + * + * Job status code which is updated after the job is completed. + * &DRM_IVPU_JOB_STATUS_SUCCESS or device specific error otherwise. + * Valid only if @handle points to a command buffer. + */ + __u32 job_status; + + /** @pad: Padding - must be zero */ + __u32 pad; +}; + +/** + * struct drm_ivpu_metric_streamer_start - Start collecting metric data + */ +struct drm_ivpu_metric_streamer_start { + /** @metric_group_mask: Indicates metric streamer instance */ + __u64 metric_group_mask; + /** @sampling_period_ns: Sampling period in nanoseconds */ + __u64 sampling_period_ns; + /** + * @read_period_samples: + * + * Number of samples after which user space will try to read the data. + * Reading the data after significantly longer period may cause data + * loss. + */ + __u32 read_period_samples; + /** @sample_size: Returned size of a single sample in bytes */ + __u32 sample_size; + /** @max_data_size: Returned max @data_size from + * %DRM_IOCTL_IVPU_METRIC_STREAMER_GET_DATA */ + __u32 max_data_size; +}; + +/** + * struct drm_ivpu_metric_streamer_get_data - Copy collected metric data + */ +struct drm_ivpu_metric_streamer_get_data { + /** @metric_group_mask: Indicates metric streamer instance */ + __u64 metric_group_mask; + /** @buffer_ptr: A pointer to a destination for the copied data */ + __u64 buffer_ptr; + /** @buffer_size: Size of the destination buffer */ + __u64 buffer_size; + /** + * @data_size: Returned size of copied metric data + * + * If the @buffer_size is zero, returns the amount of data ready to be + * copied. + */ + __u64 data_size; +}; + +/* Command queue flags */ +#define DRM_IVPU_CMDQ_FLAG_TURBO 0x00000001 + +/** + * struct drm_ivpu_cmdq_create - Create command queue for job submission + */ +struct drm_ivpu_cmdq_create { + /** @cmdq_id: Returned ID of created command queue */ + __u32 cmdq_id; + /** + * @priority: + * + * Priority to be set for related job command queue, can be one of the + * following: %DRM_IVPU_JOB_PRIORITY_DEFAULT %DRM_IVPU_JOB_PRIORITY_IDLE + * %DRM_IVPU_JOB_PRIORITY_NORMAL + * %DRM_IVPU_JOB_PRIORITY_FOCUS + * %DRM_IVPU_JOB_PRIORITY_REALTIME + */ + __u32 priority; + /** + * @flags: + * + * Supported flags: + * + * %DRM_IVPU_CMDQ_FLAG_TURBO + * + * Enable low-latency mode for the command queue. The NPU will maximize + * performance when executing jobs from such queue at the cost of + * increased power usage. + */ + __u32 flags; +}; + +/** + * struct drm_ivpu_cmdq_destroy - Destroy a command queue + */ +struct drm_ivpu_cmdq_destroy { + /** @cmdq_id: ID of command queue to destroy */ + __u32 cmdq_id; +}; + +/** + * struct drm_ivpu_metric_streamer_stop - Stop collecting metric data + */ +struct drm_ivpu_metric_streamer_stop { + /** @metric_group_mask: Indicates metric streamer instance */ + __u64 metric_group_mask; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* __UAPI_IVPU_DRM_H__ */ diff --git a/include/arch/x86_64/drm/lima_drm.h b/include/arch/x86_64/drm/lima_drm.h new file mode 100644 index 00000000..92917b4c --- /dev/null +++ b/include/arch/x86_64/drm/lima_drm.h @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ +/* Copyright 2017-2018 Qiang Yu <yuq825@gmail.com> */ + +#ifndef __LIMA_DRM_H__ +#define __LIMA_DRM_H__ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +enum drm_lima_param_gpu_id { + DRM_LIMA_PARAM_GPU_ID_UNKNOWN, + DRM_LIMA_PARAM_GPU_ID_MALI400, + DRM_LIMA_PARAM_GPU_ID_MALI450, +}; + +enum drm_lima_param { + DRM_LIMA_PARAM_GPU_ID, + DRM_LIMA_PARAM_NUM_PP, + DRM_LIMA_PARAM_GP_VERSION, + DRM_LIMA_PARAM_PP_VERSION, +}; + +/** + * get various information of the GPU + */ +struct drm_lima_get_param { + __u32 param; /* in, value in enum drm_lima_param */ + __u32 pad; /* pad, must be zero */ + __u64 value; /* out, parameter value */ +}; + +/* + * heap buffer dynamically increase backup memory size when GP task fail + * due to lack of heap memory. size field of heap buffer is an up bound of + * the backup memory which can be set to a fairly large value. + */ +#define LIMA_BO_FLAG_HEAP (1 << 0) + +/** + * create a buffer for used by GPU + */ +struct drm_lima_gem_create { + __u32 size; /* in, buffer size */ + __u32 flags; /* in, buffer flags */ + __u32 handle; /* out, GEM buffer handle */ + __u32 pad; /* pad, must be zero */ +}; + +/** + * get information of a buffer + */ +struct drm_lima_gem_info { + __u32 handle; /* in, GEM buffer handle */ + __u32 va; /* out, virtual address mapped into GPU MMU */ + __u64 offset; /* out, used to mmap this buffer to CPU */ +}; + +#define LIMA_SUBMIT_BO_READ 0x01 +#define LIMA_SUBMIT_BO_WRITE 0x02 + +/* buffer information used by one task */ +struct drm_lima_gem_submit_bo { + __u32 handle; /* in, GEM buffer handle */ + __u32 flags; /* in, buffer read/write by GPU */ +}; + +#define LIMA_GP_FRAME_REG_NUM 6 + +/* frame used to setup GP for each task */ +struct drm_lima_gp_frame { + __u32 frame[LIMA_GP_FRAME_REG_NUM]; +}; + +#define LIMA_PP_FRAME_REG_NUM 23 +#define LIMA_PP_WB_REG_NUM 12 + +/* frame used to setup mali400 GPU PP for each task */ +struct drm_lima_m400_pp_frame { + __u32 frame[LIMA_PP_FRAME_REG_NUM]; + __u32 num_pp; + __u32 wb[3 * LIMA_PP_WB_REG_NUM]; + __u32 plbu_array_address[4]; + __u32 fragment_stack_address[4]; +}; + +/* frame used to setup mali450 GPU PP for each task */ +struct drm_lima_m450_pp_frame { + __u32 frame[LIMA_PP_FRAME_REG_NUM]; + __u32 num_pp; + __u32 wb[3 * LIMA_PP_WB_REG_NUM]; + __u32 use_dlbu; + __u32 _pad; + union { + __u32 plbu_array_address[8]; + __u32 dlbu_regs[4]; + }; + __u32 fragment_stack_address[8]; +}; + +#define LIMA_PIPE_GP 0x00 +#define LIMA_PIPE_PP 0x01 + +#define LIMA_SUBMIT_FLAG_EXPLICIT_FENCE (1 << 0) + +/** + * submit a task to GPU + * + * User can always merge multi sync_file and drm_syncobj + * into one drm_syncobj as in_sync[0], but we reserve + * in_sync[1] for another task's out_sync to avoid the + * export/import/merge pass when explicit sync. + */ +struct drm_lima_gem_submit { + __u32 ctx; /* in, context handle task is submitted to */ + __u32 pipe; /* in, which pipe to use, GP/PP */ + __u32 nr_bos; /* in, array length of bos field */ + __u32 frame_size; /* in, size of frame field */ + __u64 bos; /* in, array of drm_lima_gem_submit_bo */ + __u64 frame; /* in, GP/PP frame */ + __u32 flags; /* in, submit flags */ + __u32 out_sync; /* in, drm_syncobj handle used to wait task finish after + submission */ + __u32 in_sync[2]; /* in, drm_syncobj handle used to wait before start + this task */ +}; + +#define LIMA_GEM_WAIT_READ 0x01 +#define LIMA_GEM_WAIT_WRITE 0x02 + +/** + * wait pending GPU task finish of a buffer + */ +struct drm_lima_gem_wait { + __u32 handle; /* in, GEM buffer handle */ + __u32 op; /* in, CPU want to read/write this buffer */ + __s64 timeout_ns; /* in, wait timeout in absulute time */ +}; + +/** + * create a context + */ +struct drm_lima_ctx_create { + __u32 id; /* out, context handle */ + __u32 _pad; /* pad, must be zero */ +}; + +/** + * free a context + */ +struct drm_lima_ctx_free { + __u32 id; /* in, context handle */ + __u32 _pad; /* pad, must be zero */ +}; + +#define DRM_LIMA_GET_PARAM 0x00 +#define DRM_LIMA_GEM_CREATE 0x01 +#define DRM_LIMA_GEM_INFO 0x02 +#define DRM_LIMA_GEM_SUBMIT 0x03 +#define DRM_LIMA_GEM_WAIT 0x04 +#define DRM_LIMA_CTX_CREATE 0x05 +#define DRM_LIMA_CTX_FREE 0x06 + +#define DRM_IOCTL_LIMA_GET_PARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_GET_PARAM, \ + struct drm_lima_get_param) +#define DRM_IOCTL_LIMA_GEM_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_GEM_CREATE, \ + struct drm_lima_gem_create) +#define DRM_IOCTL_LIMA_GEM_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_LIMA_GEM_INFO, struct drm_lima_gem_info) +#define DRM_IOCTL_LIMA_GEM_SUBMIT \ + DRM_IOW(DRM_COMMAND_BASE + DRM_LIMA_GEM_SUBMIT, \ + struct drm_lima_gem_submit) +#define DRM_IOCTL_LIMA_GEM_WAIT \ + DRM_IOW(DRM_COMMAND_BASE + DRM_LIMA_GEM_WAIT, struct drm_lima_gem_wait) +#define DRM_IOCTL_LIMA_CTX_CREATE \ + DRM_IOR(DRM_COMMAND_BASE + DRM_LIMA_CTX_CREATE, \ + struct drm_lima_ctx_create) +#define DRM_IOCTL_LIMA_CTX_FREE \ + DRM_IOW(DRM_COMMAND_BASE + DRM_LIMA_CTX_FREE, struct drm_lima_ctx_free) + +#if defined(__cplusplus) +} +#endif + +#endif /* __LIMA_DRM_H__ */ diff --git a/include/arch/x86_64/drm/msm_drm.h b/include/arch/x86_64/drm/msm_drm.h new file mode 100644 index 00000000..762aef57 --- /dev/null +++ b/include/arch/x86_64/drm/msm_drm.h @@ -0,0 +1,532 @@ +/* + * Copyright (C) 2013 Red Hat + * Author: Rob Clark <robdclark@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MSM_DRM_H__ +#define __MSM_DRM_H__ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints: + * 1) Do not use pointers, use __u64 instead for 32 bit / 64 bit + * user/kernel compatibility + * 2) Keep fields aligned to their size + * 3) Because of how drm_ioctl() works, we can add new fields at + * the end of an ioctl if some care is taken: drm_ioctl() will + * zero out the new fields at the tail of the ioctl, so a zero + * value should have a backwards compatible meaning. And for + * output params, userspace won't see the newly added output + * fields.. so that has to be somehow ok. + */ + +#define MSM_PIPE_NONE 0x00 +#define MSM_PIPE_2D0 0x01 +#define MSM_PIPE_2D1 0x02 +#define MSM_PIPE_3D0 0x10 + +/* The pipe-id just uses the lower bits, so can be OR'd with flags in + * the upper 16 bits (which could be extended further, if needed, maybe + * we extend/overload the pipe-id some day to deal with multiple rings, + * but even then I don't think we need the full lower 16 bits). + */ +#define MSM_PIPE_ID_MASK 0xffff +#define MSM_PIPE_ID(x) ((x) & MSM_PIPE_ID_MASK) +#define MSM_PIPE_FLAGS(x) ((x) & ~MSM_PIPE_ID_MASK) + +/* timeouts are specified in clock-monotonic absolute times (to simplify + * restarting interrupted ioctls). The following struct is logically the + * same as 'struct timespec' but 32/64b ABI safe. + */ +struct drm_msm_timespec { + __s64 tv_sec; /* seconds */ + __s64 tv_nsec; /* nanoseconds */ +}; + +/* Below "RO" indicates a read-only param, "WO" indicates write-only, and + * "RW" indicates a param that can be both read (GET_PARAM) and written + * (SET_PARAM) + */ +#define MSM_PARAM_GPU_ID 0x01 /* RO */ +#define MSM_PARAM_GMEM_SIZE 0x02 /* RO */ +#define MSM_PARAM_CHIP_ID 0x03 /* RO */ +#define MSM_PARAM_MAX_FREQ 0x04 /* RO */ +#define MSM_PARAM_TIMESTAMP 0x05 /* RO */ +#define MSM_PARAM_GMEM_BASE 0x06 /* RO */ +#define MSM_PARAM_PRIORITIES 0x07 /* RO: The # of priority levels */ +#define MSM_PARAM_PP_PGTABLE 0x08 /* RO: Deprecated, always returns zero */ +#define MSM_PARAM_FAULTS 0x09 /* RO */ +#define MSM_PARAM_SUSPENDS 0x0a /* RO */ +#define MSM_PARAM_SYSPROF \ + 0x0b /* WO: 1 preserves perfcntrs, 2 also disables suspend */ +#define MSM_PARAM_COMM 0x0c /* WO: override for task->comm */ +#define MSM_PARAM_CMDLINE 0x0d /* WO: override for task cmdline */ +#define MSM_PARAM_VA_START 0x0e /* RO: start of valid GPU iova range */ +#define MSM_PARAM_VA_SIZE 0x0f /* RO: size of valid GPU iova range (bytes) */ +#define MSM_PARAM_HIGHEST_BANK_BIT 0x10 /* RO */ +#define MSM_PARAM_RAYTRACING 0x11 /* RO */ +#define MSM_PARAM_UBWC_SWIZZLE 0x12 /* RO */ +#define MSM_PARAM_MACROTILE_MODE 0x13 /* RO */ +#define MSM_PARAM_UCHE_TRAP_BASE 0x14 /* RO */ +/* PRR (Partially Resident Region) is required for sparse residency: */ +#define MSM_PARAM_HAS_PRR 0x15 /* RO */ +/* MSM_PARAM_EN_VM_BIND is set to 1 to enable VM_BIND ops. + * + * With VM_BIND enabled, userspace is required to allocate iova and use the + * VM_BIND ops for map/unmap ioctls. MSM_INFO_SET_IOVA and MSM_INFO_GET_IOVA + * will be rejected. (The latter does not have a sensible meaning when a BO + * can have multiple and/or partial mappings.) + * + * With VM_BIND enabled, userspace does not include a submit_bo table in the + * SUBMIT ioctl (this will be rejected), the resident set is determined by + * the the VM_BIND ops. + * + * Enabling VM_BIND will fail on devices which do not have per-process pgtables. + * And it is not allowed to disable VM_BIND once it has been enabled. + * + * Enabling VM_BIND should be done (attempted) prior to allocating any BOs or + * submitqueues of type MSM_SUBMITQUEUE_VM_BIND. + * + * Relatedly, when VM_BIND mode is enabled, the kernel will not try to recover + * from GPU faults or failed async VM_BIND ops, in particular because it is + * difficult to communicate to userspace which op failed so that userspace + * could rewind and try again. When the VM is marked unusable, the SUBMIT + * ioctl will throw -EPIPE. + */ +#define MSM_PARAM_EN_VM_BIND 0x16 /* WO, once */ + +/* For backwards compat. The original support for preemption was based on + * a single ring per priority level so # of priority levels equals the # + * of rings. With drm/scheduler providing additional levels of priority, + * the number of priorities is greater than the # of rings. The param is + * renamed to better reflect this. + */ +#define MSM_PARAM_NR_RINGS MSM_PARAM_PRIORITIES + +struct drm_msm_param { + __u32 pipe; /* in, MSM_PIPE_x */ + __u32 param; /* in, MSM_PARAM_x */ + __u64 value; /* out (get_param) or in (set_param) */ + __u32 len; /* zero for non-pointer params */ + __u32 pad; /* must be zero */ +}; + +/* + * GEM buffers: + */ + +#define MSM_BO_SCANOUT 0x00000001 /* scanout capable */ +#define MSM_BO_GPU_READONLY 0x00000002 +/* Private buffers do not need to be explicitly listed in the SUBMIT + * ioctl, unless referenced by a drm_msm_gem_submit_cmd. Private + * buffers may NOT be imported/exported or used for scanout (or any + * other situation where buffers can be indefinitely pinned, but + * cases other than scanout are all kernel owned BOs which are not + * visible to userspace). + * + * In exchange for those constraints, all private BOs associated with + * a single context (drm_file) share a single dma_resv, and if there + * has been no eviction since the last submit, there are no per-BO + * bookeeping to do, significantly cutting the SUBMIT overhead. + */ +#define MSM_BO_NO_SHARE 0x00000004 +#define MSM_BO_CACHE_MASK 0x000f0000 +/* cache modes */ +#define MSM_BO_CACHED 0x00010000 +#define MSM_BO_WC 0x00020000 +#define MSM_BO_UNCACHED 0x00040000 /* deprecated, use MSM_BO_WC */ +#define MSM_BO_CACHED_COHERENT 0x080000 + +#define MSM_BO_FLAGS \ + (MSM_BO_SCANOUT | MSM_BO_GPU_READONLY | MSM_BO_NO_SHARE | \ + MSM_BO_CACHE_MASK) + +struct drm_msm_gem_new { + __u64 size; /* in */ + __u32 flags; /* in, mask of MSM_BO_x */ + __u32 handle; /* out */ +}; + +/* Get or set GEM buffer info. The requested value can be passed + * directly in 'value', or for data larger than 64b 'value' is a + * pointer to userspace buffer, with 'len' specifying the number of + * bytes copied into that buffer. For info returned by pointer, + * calling the GEM_INFO ioctl with null 'value' will return the + * required buffer size in 'len' + */ +#define MSM_INFO_GET_OFFSET 0x00 /* get mmap() offset, returned by value */ +#define MSM_INFO_GET_IOVA 0x01 /* get iova, returned by value */ +#define MSM_INFO_SET_NAME 0x02 /* set the debug name (by pointer) */ +#define MSM_INFO_GET_NAME 0x03 /* get debug name, returned by pointer */ +#define MSM_INFO_SET_IOVA 0x04 /* set the iova, passed by value */ +#define MSM_INFO_GET_FLAGS 0x05 /* get the MSM_BO_x flags */ +#define MSM_INFO_SET_METADATA 0x06 /* set userspace metadata */ +#define MSM_INFO_GET_METADATA 0x07 /* get userspace metadata */ + +struct drm_msm_gem_info { + __u32 handle; /* in */ + __u32 info; /* in - one of MSM_INFO_* */ + __u64 value; /* in or out */ + __u32 len; /* in or out */ + __u32 pad; +}; + +#define MSM_PREP_READ 0x01 +#define MSM_PREP_WRITE 0x02 +#define MSM_PREP_NOSYNC 0x04 +#define MSM_PREP_BOOST 0x08 + +#define MSM_PREP_FLAGS \ + (MSM_PREP_READ | MSM_PREP_WRITE | MSM_PREP_NOSYNC | MSM_PREP_BOOST | 0) + +struct drm_msm_gem_cpu_prep { + __u32 handle; /* in */ + __u32 op; /* in, mask of MSM_PREP_x */ + struct drm_msm_timespec timeout; /* in */ +}; + +struct drm_msm_gem_cpu_fini { + __u32 handle; /* in */ +}; + +/* + * Cmdstream Submission: + */ + +#define MSM_SYNCOBJ_RESET 0x00000001 /* Reset syncobj after wait. */ +#define MSM_SYNCOBJ_FLAGS (MSM_SYNCOBJ_RESET | 0) + +struct drm_msm_syncobj { + __u32 handle; /* in, syncobj handle. */ + __u32 flags; /* in, from MSM_SUBMIT_SYNCOBJ_FLAGS */ + __u64 point; /* in, timepoint for timeline syncobjs. */ +}; + +/* The value written into the cmdstream is logically: + * + * ((relocbuf->gpuaddr + reloc_offset) << shift) | or + * + * When we have GPU's w/ >32bit ptrs, it should be possible to deal + * with this by emit'ing two reloc entries with appropriate shift + * values. Or a new MSM_SUBMIT_CMD_x type would also be an option. + * + * NOTE that reloc's must be sorted by order of increasing submit_offset, + * otherwise EINVAL. + */ +struct drm_msm_gem_submit_reloc { + __u32 submit_offset; /* in, offset from submit_bo */ +#ifdef __cplusplus + __u32 _or; /* in, value OR'd with result */ +#else + __u32 or ; /* in, value OR'd with result */ +#endif + __s32 shift; /* in, amount of left shift (can be negative) */ + __u32 reloc_idx; /* in, index of reloc_bo buffer */ + __u64 reloc_offset; /* in, offset from start of reloc_bo */ +}; + +/* submit-types: + * BUF - this cmd buffer is executed normally. + * IB_TARGET_BUF - this cmd buffer is an IB target. Reloc's are + * processed normally, but the kernel does not setup an IB to + * this buffer in the first-level ringbuffer + * CTX_RESTORE_BUF - only executed if there has been a GPU context + * switch since the last SUBMIT ioctl + */ +#define MSM_SUBMIT_CMD_BUF 0x0001 +#define MSM_SUBMIT_CMD_IB_TARGET_BUF 0x0002 +#define MSM_SUBMIT_CMD_CTX_RESTORE_BUF 0x0003 +struct drm_msm_gem_submit_cmd { + __u32 type; /* in, one of MSM_SUBMIT_CMD_x */ + __u32 submit_idx; /* in, index of submit_bo cmdstream buffer */ + __u32 submit_offset; /* in, offset into submit_bo */ + __u32 size; /* in, cmdstream size */ + __u32 pad; + __u32 nr_relocs; /* in, number of submit_reloc's */ + union { + __u64 relocs; /* in, ptr to array of submit_reloc's */ + __u64 iova; /* cmdstream address (for VM_BIND contexts) */ + }; +}; + +/* Each buffer referenced elsewhere in the cmdstream submit (ie. the + * cmdstream buffer(s) themselves or reloc entries) has one (and only + * one) entry in the submit->bos[] table. + * + * As a optimization, the current buffer (gpu virtual address) can be + * passed back through the 'presumed' field. If on a subsequent reloc, + * userspace passes back a 'presumed' address that is still valid, + * then patching the cmdstream for this entry is skipped. This can + * avoid kernel needing to map/access the cmdstream bo in the common + * case. + */ +#define MSM_SUBMIT_BO_READ 0x0001 +#define MSM_SUBMIT_BO_WRITE 0x0002 +#define MSM_SUBMIT_BO_DUMP 0x0004 +#define MSM_SUBMIT_BO_NO_IMPLICIT 0x0008 + +#define MSM_SUBMIT_BO_FLAGS \ + (MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE | MSM_SUBMIT_BO_DUMP | \ + MSM_SUBMIT_BO_NO_IMPLICIT) + +struct drm_msm_gem_submit_bo { + __u32 flags; /* in, mask of MSM_SUBMIT_BO_x */ + __u32 handle; /* in, GEM handle */ + __u64 presumed; /* in/out, presumed buffer address */ +}; + +/* Valid submit ioctl flags: */ +#define MSM_SUBMIT_NO_IMPLICIT 0x80000000 /* disable implicit sync */ +#define MSM_SUBMIT_FENCE_FD_IN 0x40000000 /* enable input fence_fd */ +#define MSM_SUBMIT_FENCE_FD_OUT 0x20000000 /* enable output fence_fd */ +#define MSM_SUBMIT_SUDO 0x10000000 /* run submitted cmds from RB */ +#define MSM_SUBMIT_SYNCOBJ_IN 0x08000000 /* enable input syncobj */ +#define MSM_SUBMIT_SYNCOBJ_OUT 0x04000000 /* enable output syncobj */ +#define MSM_SUBMIT_FENCE_SN_IN 0x02000000 /* userspace passes in seqno fence \ + */ +#define MSM_SUBMIT_FLAGS \ + (MSM_SUBMIT_NO_IMPLICIT | MSM_SUBMIT_FENCE_FD_IN | \ + MSM_SUBMIT_FENCE_FD_OUT | MSM_SUBMIT_SUDO | MSM_SUBMIT_SYNCOBJ_IN | \ + MSM_SUBMIT_SYNCOBJ_OUT | MSM_SUBMIT_FENCE_SN_IN | 0) + +/* Each cmdstream submit consists of a table of buffers involved, and + * one or more cmdstream buffers. This allows for conditional execution + * (context-restore), and IB buffers needed for per tile/bin draw cmds. + */ +struct drm_msm_gem_submit { + __u32 flags; /* MSM_PIPE_x | MSM_SUBMIT_x */ + __u32 fence; /* out (or in with MSM_SUBMIT_FENCE_SN_IN flag) */ + __u32 nr_bos; /* in, number of submit_bo's */ + __u32 nr_cmds; /* in, number of submit_cmd's */ + __u64 bos; /* in, ptr to array of submit_bo's */ + __u64 cmds; /* in, ptr to array of submit_cmd's */ + __s32 fence_fd; /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */ + __u32 queueid; /* in, submitqueue id */ + __u64 in_syncobjs; /* in, ptr to array of drm_msm_syncobj */ + __u64 out_syncobjs; /* in, ptr to array of drm_msm_syncobj */ + __u32 nr_in_syncobjs; /* in, number of entries in in_syncobj */ + __u32 nr_out_syncobjs; /* in, number of entries in out_syncobj. */ + __u32 syncobj_stride; /* in, stride of syncobj arrays. */ + __u32 pad; /*in, reserved for future use, always 0. */ +}; + +#define MSM_VM_BIND_OP_UNMAP 0 +#define MSM_VM_BIND_OP_MAP 1 +#define MSM_VM_BIND_OP_MAP_NULL 2 + +#define MSM_VM_BIND_OP_DUMP 1 +#define MSM_VM_BIND_OP_FLAGS (MSM_VM_BIND_OP_DUMP | 0) + +/** + * struct drm_msm_vm_bind_op - bind/unbind op to run + */ +struct drm_msm_vm_bind_op { + /** @op: one of MSM_VM_BIND_OP_x */ + __u32 op; + /** @handle: GEM object handle, MBZ for UNMAP or MAP_NULL */ + __u32 handle; + /** @obj_offset: Offset into GEM object, MBZ for UNMAP or MAP_NULL */ + __u64 obj_offset; + /** @iova: Address to operate on */ + __u64 iova; + /** @range: Number of bites to to map/unmap */ + __u64 range; + /** @flags: Bitmask of MSM_VM_BIND_OP_FLAG_x */ + __u32 flags; + /** @pad: MBZ */ + __u32 pad; +}; + +#define MSM_VM_BIND_FENCE_FD_IN 0x00000001 +#define MSM_VM_BIND_FENCE_FD_OUT 0x00000002 +#define MSM_VM_BIND_FLAGS \ + (MSM_VM_BIND_FENCE_FD_IN | MSM_VM_BIND_FENCE_FD_OUT | 0) + +/** + * struct drm_msm_vm_bind - Input of &DRM_IOCTL_MSM_VM_BIND + */ +struct drm_msm_vm_bind { + /** @flags: in, bitmask of MSM_VM_BIND_x */ + __u32 flags; + /** @nr_ops: the number of bind ops in this ioctl */ + __u32 nr_ops; + /** @fence_fd: in/out fence fd (see MSM_VM_BIND_FENCE_FD_IN/OUT) */ + __s32 fence_fd; + /** @queue_id: in, submitqueue id */ + __u32 queue_id; + /** @in_syncobjs: in, ptr to array of drm_msm_gem_syncobj */ + __u64 in_syncobjs; + /** @out_syncobjs: in, ptr to array of drm_msm_gem_syncobj */ + __u64 out_syncobjs; + /** @nr_in_syncobjs: in, number of entries in in_syncobj */ + __u32 nr_in_syncobjs; + /** @nr_out_syncobjs: in, number of entries in out_syncobj */ + __u32 nr_out_syncobjs; + /** @syncobj_stride: in, stride of syncobj arrays */ + __u32 syncobj_stride; + /** @op_stride: sizeof each struct drm_msm_vm_bind_op in @ops */ + __u32 op_stride; + union { + /** @op: used if num_ops == 1 */ + struct drm_msm_vm_bind_op op; + /** @ops: userptr to array of drm_msm_vm_bind_op if num_ops > 1 + */ + __u64 ops; + }; +}; + +#define MSM_WAIT_FENCE_BOOST 0x00000001 +#define MSM_WAIT_FENCE_FLAGS (MSM_WAIT_FENCE_BOOST | 0) + +/* The normal way to synchronize with the GPU is just to CPU_PREP on + * a buffer if you need to access it from the CPU (other cmdstream + * submission from same or other contexts, PAGE_FLIP ioctl, etc, all + * handle the required synchronization under the hood). This ioctl + * mainly just exists as a way to implement the gallium pipe_fence + * APIs without requiring a dummy bo to synchronize on. + */ +struct drm_msm_wait_fence { + __u32 fence; /* in */ + __u32 flags; /* in, bitmask of MSM_WAIT_FENCE_x */ + struct drm_msm_timespec timeout; /* in */ + __u32 queueid; /* in, submitqueue id */ +}; + +/* madvise provides a way to tell the kernel in case a buffers contents + * can be discarded under memory pressure, which is useful for userspace + * bo cache where we want to optimistically hold on to buffer allocate + * and potential mmap, but allow the pages to be discarded under memory + * pressure. + * + * Typical usage would involve madvise(DONTNEED) when buffer enters BO + * cache, and madvise(WILLNEED) if trying to recycle buffer from BO cache. + * In the WILLNEED case, 'retained' indicates to userspace whether the + * backing pages still exist. + */ +#define MSM_MADV_WILLNEED \ + 0 /* backing pages are needed, status returned in 'retained' */ +#define MSM_MADV_DONTNEED 1 /* backing pages not needed */ +#define __MSM_MADV_PURGED 2 /* internal state */ + +struct drm_msm_gem_madvise { + __u32 handle; /* in, GEM handle */ + __u32 madv; /* in, MSM_MADV_x */ + __u32 retained; /* out, whether backing store still exists */ +}; + +/* + * Draw queues allow the user to set specific submission parameter. Command + * submissions specify a specific submitqueue to use. ID 0 is reserved for + * backwards compatibility as a "default" submitqueue. + * + * Because VM_BIND async updates happen on the CPU, they must run on a + * virtual queue created with the flag MSM_SUBMITQUEUE_VM_BIND. If we had + * a way to do pgtable updates on the GPU, we could drop this restriction. + */ + +#define MSM_SUBMITQUEUE_ALLOW_PREEMPT 0x00000001 +#define MSM_SUBMITQUEUE_VM_BIND 0x00000002 /* virtual queue for VM_BIND ops */ + +#define MSM_SUBMITQUEUE_FLAGS \ + (MSM_SUBMITQUEUE_ALLOW_PREEMPT | MSM_SUBMITQUEUE_VM_BIND | 0) + +/* + * The submitqueue priority should be between 0 and MSM_PARAM_PRIORITIES-1, + * a lower numeric value is higher priority. + */ +struct drm_msm_submitqueue { + __u32 flags; /* in, MSM_SUBMITQUEUE_x */ + __u32 prio; /* in, Priority level */ + __u32 id; /* out, identifier */ +}; + +#define MSM_SUBMITQUEUE_PARAM_FAULTS 0 + +struct drm_msm_submitqueue_query { + __u64 data; + __u32 id; + __u32 param; + __u32 len; + __u32 pad; +}; + +#define DRM_MSM_GET_PARAM 0x00 +#define DRM_MSM_SET_PARAM 0x01 +#define DRM_MSM_GEM_NEW 0x02 +#define DRM_MSM_GEM_INFO 0x03 +#define DRM_MSM_GEM_CPU_PREP 0x04 +#define DRM_MSM_GEM_CPU_FINI 0x05 +#define DRM_MSM_GEM_SUBMIT 0x06 +#define DRM_MSM_WAIT_FENCE 0x07 +#define DRM_MSM_GEM_MADVISE 0x08 +/* placeholder: +#define DRM_MSM_GEM_SVM_NEW 0x09 + */ +#define DRM_MSM_SUBMITQUEUE_NEW 0x0A +#define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B +#define DRM_MSM_SUBMITQUEUE_QUERY 0x0C +#define DRM_MSM_VM_BIND 0x0D + +#define DRM_IOCTL_MSM_GET_PARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param) +#define DRM_IOCTL_MSM_SET_PARAM \ + DRM_IOW(DRM_COMMAND_BASE + DRM_MSM_SET_PARAM, struct drm_msm_param) +#define DRM_IOCTL_MSM_GEM_NEW \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new) +#define DRM_IOCTL_MSM_GEM_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_INFO, struct drm_msm_gem_info) +#define DRM_IOCTL_MSM_GEM_CPU_PREP \ + DRM_IOW(DRM_COMMAND_BASE + DRM_MSM_GEM_CPU_PREP, \ + struct drm_msm_gem_cpu_prep) +#define DRM_IOCTL_MSM_GEM_CPU_FINI \ + DRM_IOW(DRM_COMMAND_BASE + DRM_MSM_GEM_CPU_FINI, \ + struct drm_msm_gem_cpu_fini) +#define DRM_IOCTL_MSM_GEM_SUBMIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_SUBMIT, \ + struct drm_msm_gem_submit) +#define DRM_IOCTL_MSM_WAIT_FENCE \ + DRM_IOW(DRM_COMMAND_BASE + DRM_MSM_WAIT_FENCE, \ + struct drm_msm_wait_fence) +#define DRM_IOCTL_MSM_GEM_MADVISE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_MADVISE, \ + struct drm_msm_gem_madvise) +#define DRM_IOCTL_MSM_SUBMITQUEUE_NEW \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, \ + struct drm_msm_submitqueue) +#define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE \ + DRM_IOW(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32) +#define DRM_IOCTL_MSM_SUBMITQUEUE_QUERY \ + DRM_IOW(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_QUERY, \ + struct drm_msm_submitqueue_query) +#define DRM_IOCTL_MSM_VM_BIND \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_VM_BIND, struct drm_msm_vm_bind) + +#if defined(__cplusplus) +} +#endif + +#endif /* __MSM_DRM_H__ */ diff --git a/include/arch/x86_64/drm/nouveau_drm.h b/include/arch/x86_64/drm/nouveau_drm.h new file mode 100644 index 00000000..8ba0ee86 --- /dev/null +++ b/include/arch/x86_64/drm/nouveau_drm.h @@ -0,0 +1,544 @@ +/* + * Copyright 2005 Stephane Marchesin. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NOUVEAU_DRM_H__ +#define __NOUVEAU_DRM_H__ + +#define DRM_NOUVEAU_EVENT_NVIF 0x80000000 + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define NOUVEAU_GETPARAM_PCI_VENDOR 3 +#define NOUVEAU_GETPARAM_PCI_DEVICE 4 +#define NOUVEAU_GETPARAM_BUS_TYPE 5 +#define NOUVEAU_GETPARAM_FB_SIZE 8 +#define NOUVEAU_GETPARAM_AGP_SIZE 9 +#define NOUVEAU_GETPARAM_CHIPSET_ID 11 +#define NOUVEAU_GETPARAM_VM_VRAM_BASE 12 +#define NOUVEAU_GETPARAM_GRAPH_UNITS 13 +#define NOUVEAU_GETPARAM_PTIMER_TIME 14 +#define NOUVEAU_GETPARAM_HAS_BO_USAGE 15 +#define NOUVEAU_GETPARAM_HAS_PAGEFLIP 16 + +/* + * NOUVEAU_GETPARAM_EXEC_PUSH_MAX - query max pushes through getparam + * + * Query the maximum amount of IBs that can be pushed through a single + * &drm_nouveau_exec structure and hence a single &DRM_IOCTL_NOUVEAU_EXEC + * ioctl(). + */ +#define NOUVEAU_GETPARAM_EXEC_PUSH_MAX 17 + +/* + * NOUVEAU_GETPARAM_VRAM_BAR_SIZE - query bar size + * + * Query the VRAM BAR size. + */ +#define NOUVEAU_GETPARAM_VRAM_BAR_SIZE 18 + +/* + * NOUVEAU_GETPARAM_VRAM_USED + * + * Get remaining VRAM size. + */ +#define NOUVEAU_GETPARAM_VRAM_USED 19 + +/* + * NOUVEAU_GETPARAM_HAS_VMA_TILEMODE + * + * Query whether tile mode and PTE kind are accepted with VM allocs or not. + */ +#define NOUVEAU_GETPARAM_HAS_VMA_TILEMODE 20 + +struct drm_nouveau_getparam { + __u64 param; + __u64 value; +}; + +/* + * Those are used to support selecting the main engine used on Kepler. + * This goes into drm_nouveau_channel_alloc::tt_ctxdma_handle + */ +#define NOUVEAU_FIFO_ENGINE_GR 0x01 +#define NOUVEAU_FIFO_ENGINE_VP 0x02 +#define NOUVEAU_FIFO_ENGINE_PPP 0x04 +#define NOUVEAU_FIFO_ENGINE_BSP 0x08 +#define NOUVEAU_FIFO_ENGINE_CE 0x30 + +struct drm_nouveau_channel_alloc { + __u32 fb_ctxdma_handle; + __u32 tt_ctxdma_handle; + + __s32 channel; + __u32 pushbuf_domains; + + /* Notifier memory */ + __u32 notifier_handle; + + /* DRM-enforced subchannel assignments */ + struct { + __u32 handle; + __u32 grclass; + } subchan[8]; + __u32 nr_subchan; +}; + +struct drm_nouveau_channel_free { + __s32 channel; +}; + +struct drm_nouveau_notifierobj_alloc { + __u32 channel; + __u32 handle; + __u32 size; + __u32 offset; +}; + +struct drm_nouveau_gpuobj_free { + __s32 channel; + __u32 handle; +}; + +#define NOUVEAU_GEM_DOMAIN_CPU (1 << 0) +#define NOUVEAU_GEM_DOMAIN_VRAM (1 << 1) +#define NOUVEAU_GEM_DOMAIN_GART (1 << 2) +#define NOUVEAU_GEM_DOMAIN_MAPPABLE (1 << 3) +#define NOUVEAU_GEM_DOMAIN_COHERENT (1 << 4) +/* The BO will never be shared via import or export. */ +#define NOUVEAU_GEM_DOMAIN_NO_SHARE (1 << 5) + +#define NOUVEAU_GEM_TILE_COMP 0x00030000 /* nv50-only */ +#define NOUVEAU_GEM_TILE_LAYOUT_MASK 0x0000ff00 +#define NOUVEAU_GEM_TILE_16BPP 0x00000001 +#define NOUVEAU_GEM_TILE_32BPP 0x00000002 +#define NOUVEAU_GEM_TILE_ZETA 0x00000004 +#define NOUVEAU_GEM_TILE_NONCONTIG 0x00000008 + +struct drm_nouveau_gem_info { + __u32 handle; + __u32 domain; + __u64 size; + __u64 offset; + __u64 map_handle; + __u32 tile_mode; + __u32 tile_flags; +}; + +struct drm_nouveau_gem_new { + struct drm_nouveau_gem_info info; + __u32 channel_hint; + __u32 align; +}; + +#define NOUVEAU_GEM_MAX_BUFFERS 1024 +struct drm_nouveau_gem_pushbuf_bo_presumed { + __u32 valid; + __u32 domain; + __u64 offset; +}; + +struct drm_nouveau_gem_pushbuf_bo { + __u64 user_priv; + __u32 handle; + __u32 read_domains; + __u32 write_domains; + __u32 valid_domains; + struct drm_nouveau_gem_pushbuf_bo_presumed presumed; +}; + +#define NOUVEAU_GEM_RELOC_LOW (1 << 0) +#define NOUVEAU_GEM_RELOC_HIGH (1 << 1) +#define NOUVEAU_GEM_RELOC_OR (1 << 2) +#define NOUVEAU_GEM_MAX_RELOCS 1024 +struct drm_nouveau_gem_pushbuf_reloc { + __u32 reloc_bo_index; + __u32 reloc_bo_offset; + __u32 bo_index; + __u32 flags; + __u32 data; + __u32 vor; + __u32 tor; +}; + +#define NOUVEAU_GEM_MAX_PUSH 512 +struct drm_nouveau_gem_pushbuf_push { + __u32 bo_index; + __u32 pad; + __u64 offset; + __u64 length; +#define NOUVEAU_GEM_PUSHBUF_NO_PREFETCH (1 << 23) +}; + +struct drm_nouveau_gem_pushbuf { + __u32 channel; + __u32 nr_buffers; + __u64 buffers; + __u32 nr_relocs; + __u32 nr_push; + __u64 relocs; + __u64 push; + __u32 suffix0; + __u32 suffix1; +#define NOUVEAU_GEM_PUSHBUF_SYNC (1ULL << 0) + __u64 vram_available; + __u64 gart_available; +}; + +#define NOUVEAU_GEM_CPU_PREP_NOWAIT 0x00000001 +#define NOUVEAU_GEM_CPU_PREP_WRITE 0x00000004 +struct drm_nouveau_gem_cpu_prep { + __u32 handle; + __u32 flags; +}; + +struct drm_nouveau_gem_cpu_fini { + __u32 handle; +}; + +/** + * struct drm_nouveau_sync - sync object + * + * This structure serves as synchronization mechanism for (potentially) + * asynchronous operations such as EXEC or VM_BIND. + */ +struct drm_nouveau_sync { + /** + * @flags: the flags for a sync object + * + * The first 8 bits are used to determine the type of the sync object. + */ + __u32 flags; +#define DRM_NOUVEAU_SYNC_SYNCOBJ 0x0 +#define DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ 0x1 +#define DRM_NOUVEAU_SYNC_TYPE_MASK 0xf + /** + * @handle: the handle of the sync object + */ + __u32 handle; + /** + * @timeline_value: + * + * The timeline point of the sync object in case the syncobj is of + * type DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ. + */ + __u64 timeline_value; +}; + +/** + * struct drm_nouveau_vm_init - GPU VA space init structure + * + * Used to initialize the GPU's VA space for a user client, telling the kernel + * which portion of the VA space is managed by the UMD and kernel respectively. + * + * For the UMD to use the VM_BIND uAPI, this must be called before any BOs or + * channels are created; if called afterwards DRM_IOCTL_NOUVEAU_VM_INIT fails + * with -ENOSYS. + */ +struct drm_nouveau_vm_init { + /** + * @kernel_managed_addr: start address of the kernel managed VA space + * region + */ + __u64 kernel_managed_addr; + /** + * @kernel_managed_size: size of the kernel managed VA space region in + * bytes + */ + __u64 kernel_managed_size; +}; + +/** + * struct drm_nouveau_vm_bind_op - VM_BIND operation + * + * This structure represents a single VM_BIND operation. UMDs should pass + * an array of this structure via struct drm_nouveau_vm_bind's &op_ptr field. + */ +struct drm_nouveau_vm_bind_op { + /** + * @op: the operation type + * + * Supported values: + * + * %DRM_NOUVEAU_VM_BIND_OP_MAP - Map a GEM object to the GPU's VA + * space. Optionally, the &DRM_NOUVEAU_VM_BIND_SPARSE flag can be + * passed to instruct the kernel to create sparse mappings for the + * given range. + * + * %DRM_NOUVEAU_VM_BIND_OP_UNMAP - Unmap an existing mapping in the + * GPU's VA space. If the region the mapping is located in is a + * sparse region, new sparse mappings are created where the unmapped + * (memory backed) mapping was mapped previously. To remove a sparse + * region the &DRM_NOUVEAU_VM_BIND_SPARSE must be set. + */ + __u32 op; +#define DRM_NOUVEAU_VM_BIND_OP_MAP 0x0 +#define DRM_NOUVEAU_VM_BIND_OP_UNMAP 0x1 + /** + * @flags: the flags for a &drm_nouveau_vm_bind_op + * + * Supported values: + * + * %DRM_NOUVEAU_VM_BIND_SPARSE - Indicates that an allocated VA + * space region should be sparse. + */ + __u32 flags; +#define DRM_NOUVEAU_VM_BIND_SPARSE (1 << 8) + /** + * @handle: the handle of the DRM GEM object to map + */ + __u32 handle; + /** + * @pad: 32 bit padding, should be 0 + */ + __u32 pad; + /** + * @addr: + * + * the address the VA space region or (memory backed) mapping should be + * mapped to + */ + __u64 addr; + /** + * @bo_offset: the offset within the BO backing the mapping + */ + __u64 bo_offset; + /** + * @range: the size of the requested mapping in bytes + */ + __u64 range; +}; + +/** + * struct drm_nouveau_vm_bind - structure for DRM_IOCTL_NOUVEAU_VM_BIND + */ +struct drm_nouveau_vm_bind { + /** + * @op_count: the number of &drm_nouveau_vm_bind_op + */ + __u32 op_count; + /** + * @flags: the flags for a &drm_nouveau_vm_bind ioctl + * + * Supported values: + * + * %DRM_NOUVEAU_VM_BIND_RUN_ASYNC - Indicates that the given VM_BIND + * operation should be executed asynchronously by the kernel. + * + * If this flag is not supplied the kernel executes the associated + * operations synchronously and doesn't accept any &drm_nouveau_sync + * objects. + */ + __u32 flags; +#define DRM_NOUVEAU_VM_BIND_RUN_ASYNC 0x1 + /** + * @wait_count: the number of wait &drm_nouveau_syncs + */ + __u32 wait_count; + /** + * @sig_count: the number of &drm_nouveau_syncs to signal when finished + */ + __u32 sig_count; + /** + * @wait_ptr: pointer to &drm_nouveau_syncs to wait for + */ + __u64 wait_ptr; + /** + * @sig_ptr: pointer to &drm_nouveau_syncs to signal when finished + */ + __u64 sig_ptr; + /** + * @op_ptr: pointer to the &drm_nouveau_vm_bind_ops to execute + */ + __u64 op_ptr; +}; + +/** + * struct drm_nouveau_exec_push - EXEC push operation + * + * This structure represents a single EXEC push operation. UMDs should pass an + * array of this structure via struct drm_nouveau_exec's &push_ptr field. + */ +struct drm_nouveau_exec_push { + /** + * @va: the virtual address of the push buffer mapping + */ + __u64 va; + /** + * @va_len: the length of the push buffer mapping + */ + __u32 va_len; + /** + * @flags: the flags for this push buffer mapping + */ + __u32 flags; +#define DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH 0x1 +}; + +/** + * struct drm_nouveau_exec - structure for DRM_IOCTL_NOUVEAU_EXEC + */ +struct drm_nouveau_exec { + /** + * @channel: the channel to execute the push buffer in + */ + __u32 channel; + /** + * @push_count: the number of &drm_nouveau_exec_push ops + */ + __u32 push_count; + /** + * @wait_count: the number of wait &drm_nouveau_syncs + */ + __u32 wait_count; + /** + * @sig_count: the number of &drm_nouveau_syncs to signal when finished + */ + __u32 sig_count; + /** + * @wait_ptr: pointer to &drm_nouveau_syncs to wait for + */ + __u64 wait_ptr; + /** + * @sig_ptr: pointer to &drm_nouveau_syncs to signal when finished + */ + __u64 sig_ptr; + /** + * @push_ptr: pointer to &drm_nouveau_exec_push ops + */ + __u64 push_ptr; +}; + +#define DRM_NOUVEAU_GETPARAM 0x00 +#define DRM_NOUVEAU_SETPARAM 0x01 /* deprecated */ +#define DRM_NOUVEAU_CHANNEL_ALLOC 0x02 +#define DRM_NOUVEAU_CHANNEL_FREE 0x03 +#define DRM_NOUVEAU_GROBJ_ALLOC 0x04 /* deprecated */ +#define DRM_NOUVEAU_NOTIFIEROBJ_ALLOC 0x05 /* deprecated */ +#define DRM_NOUVEAU_GPUOBJ_FREE 0x06 /* deprecated */ +#define DRM_NOUVEAU_NVIF 0x07 +#define DRM_NOUVEAU_SVM_INIT 0x08 +#define DRM_NOUVEAU_SVM_BIND 0x09 +#define DRM_NOUVEAU_VM_INIT 0x10 +#define DRM_NOUVEAU_VM_BIND 0x11 +#define DRM_NOUVEAU_EXEC 0x12 +#define DRM_NOUVEAU_GEM_NEW 0x40 +#define DRM_NOUVEAU_GEM_PUSHBUF 0x41 +#define DRM_NOUVEAU_GEM_CPU_PREP 0x42 +#define DRM_NOUVEAU_GEM_CPU_FINI 0x43 +#define DRM_NOUVEAU_GEM_INFO 0x44 + +struct drm_nouveau_svm_init { + __u64 unmanaged_addr; + __u64 unmanaged_size; +}; + +struct drm_nouveau_svm_bind { + __u64 header; + __u64 va_start; + __u64 va_end; + __u64 npages; + __u64 stride; + __u64 result; + __u64 reserved0; + __u64 reserved1; +}; + +#define NOUVEAU_SVM_BIND_COMMAND_SHIFT 0 +#define NOUVEAU_SVM_BIND_COMMAND_BITS 8 +#define NOUVEAU_SVM_BIND_COMMAND_MASK ((1 << 8) - 1) +#define NOUVEAU_SVM_BIND_PRIORITY_SHIFT 8 +#define NOUVEAU_SVM_BIND_PRIORITY_BITS 8 +#define NOUVEAU_SVM_BIND_PRIORITY_MASK ((1 << 8) - 1) +#define NOUVEAU_SVM_BIND_TARGET_SHIFT 16 +#define NOUVEAU_SVM_BIND_TARGET_BITS 32 +#define NOUVEAU_SVM_BIND_TARGET_MASK 0xffffffff + +/* + * Below is use to validate ioctl argument, userspace can also use it to make + * sure that no bit are set beyond known fields for a given kernel version. + */ +#define NOUVEAU_SVM_BIND_VALID_BITS 48 +#define NOUVEAU_SVM_BIND_VALID_MASK ((1ULL << NOUVEAU_SVM_BIND_VALID_BITS) - 1) + +/* + * NOUVEAU_BIND_COMMAND__MIGRATE: synchronous migrate to target memory. + * result: number of page successfuly migrate to the target memory. + */ +#define NOUVEAU_SVM_BIND_COMMAND__MIGRATE 0 + +/* + * NOUVEAU_SVM_BIND_HEADER_TARGET__GPU_VRAM: target the GPU VRAM memory. + */ +#define NOUVEAU_SVM_BIND_TARGET__GPU_VRAM (1UL << 31) + +#define DRM_IOCTL_NOUVEAU_GETPARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GETPARAM, \ + struct drm_nouveau_getparam) +#define DRM_IOCTL_NOUVEAU_CHANNEL_ALLOC \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_ALLOC, \ + struct drm_nouveau_channel_alloc) +#define DRM_IOCTL_NOUVEAU_CHANNEL_FREE \ + DRM_IOW(DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_FREE, \ + struct drm_nouveau_channel_free) + +#define DRM_IOCTL_NOUVEAU_SVM_INIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SVM_INIT, \ + struct drm_nouveau_svm_init) +#define DRM_IOCTL_NOUVEAU_SVM_BIND \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SVM_BIND, \ + struct drm_nouveau_svm_bind) + +#define DRM_IOCTL_NOUVEAU_GEM_NEW \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_NEW, \ + struct drm_nouveau_gem_new) +#define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF, \ + struct drm_nouveau_gem_pushbuf) +#define DRM_IOCTL_NOUVEAU_GEM_CPU_PREP \ + DRM_IOW(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_PREP, \ + struct drm_nouveau_gem_cpu_prep) +#define DRM_IOCTL_NOUVEAU_GEM_CPU_FINI \ + DRM_IOW(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_FINI, \ + struct drm_nouveau_gem_cpu_fini) +#define DRM_IOCTL_NOUVEAU_GEM_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_INFO, \ + struct drm_nouveau_gem_info) + +#define DRM_IOCTL_NOUVEAU_VM_INIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_VM_INIT, \ + struct drm_nouveau_vm_init) +#define DRM_IOCTL_NOUVEAU_VM_BIND \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_VM_BIND, \ + struct drm_nouveau_vm_bind) +#define DRM_IOCTL_NOUVEAU_EXEC \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_EXEC, struct drm_nouveau_exec) +#if defined(__cplusplus) +} +#endif + +#endif /* __NOUVEAU_DRM_H__ */ diff --git a/include/arch/x86_64/drm/nova_drm.h b/include/arch/x86_64/drm/nova_drm.h new file mode 100644 index 00000000..5725b84e --- /dev/null +++ b/include/arch/x86_64/drm/nova_drm.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef __NOVA_DRM_H__ +#define __NOVA_DRM_H__ + +#include "drm.h" + +/* DISCLAIMER: Do not use, this is not a stable uAPI. + * + * This uAPI serves only testing purposes as long as this driver is still in + * development. It is required to implement and test infrastructure which is + * upstreamed in the context of this driver. See also [1]. + * + * [1] https://lore.kernel.org/dri-devel/Zfsj0_tb-0-tNrJy@cassiopeiae/T/#u + */ + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * NOVA_GETPARAM_VRAM_BAR_SIZE + * + * Query the VRAM BAR size in bytes. + */ +#define NOVA_GETPARAM_VRAM_BAR_SIZE 0x1 + +/** + * struct drm_nova_getparam - query GPU and driver metadata + */ +struct drm_nova_getparam { + /** + * @param: The identifier of the parameter to query. + */ + __u64 param; + + /** + * @value: The value for the specified parameter. + */ + __u64 value; +}; + +/** + * struct drm_nova_gem_create - create a new DRM GEM object + */ +struct drm_nova_gem_create { + /** + * @handle: The handle of the new DRM GEM object. + */ + __u32 handle; + + /** + * @pad: 32 bit padding, should be 0. + */ + __u32 pad; + + /** + * @size: The size of the new DRM GEM object. + */ + __u64 size; +}; + +/** + * struct drm_nova_gem_info - query DRM GEM object metadata + */ +struct drm_nova_gem_info { + /** + * @handle: The handle of the DRM GEM object to query. + */ + __u32 handle; + + /** + * @pad: 32 bit padding, should be 0. + */ + __u32 pad; + + /** + * @size: The size of the DRM GEM obejct. + */ + __u64 size; +}; + +#define DRM_NOVA_GETPARAM 0x00 +#define DRM_NOVA_GEM_CREATE 0x01 +#define DRM_NOVA_GEM_INFO 0x02 + +/* Note: this is an enum so that it can be resolved by Rust bindgen. */ +enum { + DRM_IOCTL_NOVA_GETPARAM = DRM_IOWR(DRM_COMMAND_BASE + DRM_NOVA_GETPARAM, + struct drm_nova_getparam), + DRM_IOCTL_NOVA_GEM_CREATE = + DRM_IOWR(DRM_COMMAND_BASE + DRM_NOVA_GEM_CREATE, + struct drm_nova_gem_create), + DRM_IOCTL_NOVA_GEM_INFO = DRM_IOWR(DRM_COMMAND_BASE + DRM_NOVA_GEM_INFO, + struct drm_nova_gem_info), +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* __NOVA_DRM_H__ */ diff --git a/include/arch/x86_64/drm/omap_drm.h b/include/arch/x86_64/drm/omap_drm.h new file mode 100644 index 00000000..54f01ad1 --- /dev/null +++ b/include/arch/x86_64/drm/omap_drm.h @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * include/uapi/drm/omap_drm.h + * + * Copyright (C) 2011 Texas Instruments + * Author: Rob Clark <rob@ti.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __OMAP_DRM_H__ +#define __OMAP_DRM_H__ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints. + */ + +#define OMAP_PARAM_CHIPSET_ID 1 /* ie. 0x3430, 0x4430, etc */ + +struct drm_omap_param { + __u64 param; /* in */ + __u64 value; /* in (set_param), out (get_param) */ +}; + +/* Scanout buffer, consumable by DSS */ +#define OMAP_BO_SCANOUT 0x00000001 + +/* Buffer CPU caching mode: cached, write-combining or uncached. */ +#define OMAP_BO_CACHED 0x00000000 +#define OMAP_BO_WC 0x00000002 +#define OMAP_BO_UNCACHED 0x00000004 +#define OMAP_BO_CACHE_MASK 0x00000006 + +/* Use TILER for the buffer. The TILER container unit can be 8, 16 or 32 bits. + */ +#define OMAP_BO_TILED_8 0x00000100 +#define OMAP_BO_TILED_16 0x00000200 +#define OMAP_BO_TILED_32 0x00000300 +#define OMAP_BO_TILED_MASK 0x00000f00 + +union omap_gem_size { + __u32 bytes; /* (for non-tiled formats) */ + struct { + __u16 width; + __u16 height; + } tiled; /* (for tiled formats) */ +}; + +struct drm_omap_gem_new { + union omap_gem_size size; /* in */ + __u32 flags; /* in */ + __u32 handle; /* out */ + __u32 __pad; +}; + +/* mask of operations: */ +enum omap_gem_op { + OMAP_GEM_READ = 0x01, + OMAP_GEM_WRITE = 0x02, +}; + +struct drm_omap_gem_cpu_prep { + __u32 handle; /* buffer handle (in) */ + __u32 op; /* mask of omap_gem_op (in) */ +}; + +struct drm_omap_gem_cpu_fini { + __u32 handle; /* buffer handle (in) */ + __u32 op; /* mask of omap_gem_op (in) */ + /* TODO maybe here we pass down info about what regions are touched + * by sw so we can be clever about cache ops? For now a placeholder, + * set to zero and we just do full buffer flush.. + */ + __u32 nregions; + __u32 __pad; +}; + +struct drm_omap_gem_info { + __u32 handle; /* buffer handle (in) */ + __u32 pad; + __u64 offset; /* mmap offset (out) */ + /* note: in case of tiled buffers, the user virtual size can be + * different from the physical size (ie. how many pages are needed + * to back the object) which is returned in DRM_IOCTL_GEM_OPEN.. + * This size here is the one that should be used if you want to + * mmap() the buffer: + */ + __u32 size; /* virtual size for mmap'ing (out) */ + __u32 __pad; +}; + +#define DRM_OMAP_GET_PARAM 0x00 +#define DRM_OMAP_SET_PARAM 0x01 +#define DRM_OMAP_GEM_NEW 0x03 +#define DRM_OMAP_GEM_CPU_PREP 0x04 /* Deprecated, to be removed */ +#define DRM_OMAP_GEM_CPU_FINI 0x05 /* Deprecated, to be removed */ +#define DRM_OMAP_GEM_INFO 0x06 +#define DRM_OMAP_NUM_IOCTLS 0x07 + +#define DRM_IOCTL_OMAP_GET_PARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_OMAP_GET_PARAM, struct drm_omap_param) +#define DRM_IOCTL_OMAP_SET_PARAM \ + DRM_IOW(DRM_COMMAND_BASE + DRM_OMAP_SET_PARAM, struct drm_omap_param) +#define DRM_IOCTL_OMAP_GEM_NEW \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_OMAP_GEM_NEW, struct drm_omap_gem_new) +#define DRM_IOCTL_OMAP_GEM_CPU_PREP \ + DRM_IOW(DRM_COMMAND_BASE + DRM_OMAP_GEM_CPU_PREP, \ + struct drm_omap_gem_cpu_prep) +#define DRM_IOCTL_OMAP_GEM_CPU_FINI \ + DRM_IOW(DRM_COMMAND_BASE + DRM_OMAP_GEM_CPU_FINI, \ + struct drm_omap_gem_cpu_fini) +#define DRM_IOCTL_OMAP_GEM_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_OMAP_GEM_INFO, struct drm_omap_gem_info) + +#if defined(__cplusplus) +} +#endif + +#endif /* __OMAP_DRM_H__ */ diff --git a/include/arch/x86_64/drm/panfrost_drm.h b/include/arch/x86_64/drm/panfrost_drm.h new file mode 100644 index 00000000..2ce0e858 --- /dev/null +++ b/include/arch/x86_64/drm/panfrost_drm.h @@ -0,0 +1,327 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2018 Broadcom + * Copyright © 2019 Collabora ltd. + */ +#ifndef _PANFROST_DRM_H_ +#define _PANFROST_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_PANFROST_SUBMIT 0x00 +#define DRM_PANFROST_WAIT_BO 0x01 +#define DRM_PANFROST_CREATE_BO 0x02 +#define DRM_PANFROST_MMAP_BO 0x03 +#define DRM_PANFROST_GET_PARAM 0x04 +#define DRM_PANFROST_GET_BO_OFFSET 0x05 +#define DRM_PANFROST_PERFCNT_ENABLE 0x06 +#define DRM_PANFROST_PERFCNT_DUMP 0x07 +#define DRM_PANFROST_MADVISE 0x08 +#define DRM_PANFROST_SET_LABEL_BO 0x09 + +#define DRM_IOCTL_PANFROST_SUBMIT \ + DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, \ + struct drm_panfrost_submit) +#define DRM_IOCTL_PANFROST_WAIT_BO \ + DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, \ + struct drm_panfrost_wait_bo) +#define DRM_IOCTL_PANFROST_CREATE_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_CREATE_BO, \ + struct drm_panfrost_create_bo) +#define DRM_IOCTL_PANFROST_MMAP_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_MMAP_BO, \ + struct drm_panfrost_mmap_bo) +#define DRM_IOCTL_PANFROST_GET_PARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GET_PARAM, \ + struct drm_panfrost_get_param) +#define DRM_IOCTL_PANFROST_GET_BO_OFFSET \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_GET_BO_OFFSET, \ + struct drm_panfrost_get_bo_offset) +#define DRM_IOCTL_PANFROST_MADVISE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_MADVISE, \ + struct drm_panfrost_madvise) +#define DRM_IOCTL_PANFROST_SET_LABEL_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_SET_LABEL_BO, \ + struct drm_panfrost_set_label_bo) + +/* + * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module + * param is set to true. + * All these ioctl(s) are subject to deprecation, so please don't rely on + * them for anything but debugging purpose. + */ +#define DRM_IOCTL_PANFROST_PERFCNT_ENABLE \ + DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_PERFCNT_ENABLE, \ + struct drm_panfrost_perfcnt_enable) +#define DRM_IOCTL_PANFROST_PERFCNT_DUMP \ + DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_PERFCNT_DUMP, \ + struct drm_panfrost_perfcnt_dump) + +#define PANFROST_JD_REQ_FS (1 << 0) +#define PANFROST_JD_REQ_CYCLE_COUNT (1 << 1) +/** + * struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D + * engine. + * + * This asks the kernel to have the GPU execute a render command list. + */ +struct drm_panfrost_submit { + /** Address to GPU mapping of job descriptor */ + __u64 jc; + + /** An optional array of sync objects to wait on before starting this + * job. */ + __u64 in_syncs; + + /** Number of sync objects to wait on before starting this job. */ + __u32 in_sync_count; + + /** An optional sync object to place the completion fence in. */ + __u32 out_sync; + + /** Pointer to a u32 array of the BOs that are referenced by the job. */ + __u64 bo_handles; + + /** Number of BO handles passed in (size is that times 4). */ + __u32 bo_handle_count; + + /** A combination of PANFROST_JD_REQ_* */ + __u32 requirements; +}; + +/** + * struct drm_panfrost_wait_bo - ioctl argument for waiting for + * completion of the last DRM_PANFROST_SUBMIT on a BO. + * + * This is useful for cases where multiple processes might be + * rendering to a BO and you want to wait for all rendering to be + * completed. + */ +struct drm_panfrost_wait_bo { + __u32 handle; + __u32 pad; + __s64 timeout_ns; /* absolute */ +}; + +/* Valid flags to pass to drm_panfrost_create_bo */ +#define PANFROST_BO_NOEXEC 1 +#define PANFROST_BO_HEAP 2 + +/** + * struct drm_panfrost_create_bo - ioctl argument for creating Panfrost BOs. + * + * The flags argument is a bit mask of PANFROST_BO_* flags. + */ +struct drm_panfrost_create_bo { + __u32 size; + __u32 flags; + /** Returned GEM handle for the BO. */ + __u32 handle; + /* Pad, must be zero-filled. */ + __u32 pad; + /** + * Returned offset for the BO in the GPU address space. This offset + * is private to the DRM fd and is valid for the lifetime of the GEM + * handle. + * + * This offset value will always be nonzero, since various HW + * units treat 0 specially. + */ + __u64 offset; +}; + +/** + * struct drm_panfrost_mmap_bo - ioctl argument for mapping Panfrost BOs. + * + * This doesn't actually perform an mmap. Instead, it returns the + * offset you need to use in an mmap on the DRM device node. This + * means that tools like valgrind end up knowing about the mapped + * memory. + * + * There are currently no values for the flags argument, but it may be + * used in a future extension. + */ +struct drm_panfrost_mmap_bo { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 flags; + /** offset into the drm node to use for subsequent mmap call. */ + __u64 offset; +}; + +enum drm_panfrost_param { + DRM_PANFROST_PARAM_GPU_PROD_ID, + DRM_PANFROST_PARAM_GPU_REVISION, + DRM_PANFROST_PARAM_SHADER_PRESENT, + DRM_PANFROST_PARAM_TILER_PRESENT, + DRM_PANFROST_PARAM_L2_PRESENT, + DRM_PANFROST_PARAM_STACK_PRESENT, + DRM_PANFROST_PARAM_AS_PRESENT, + DRM_PANFROST_PARAM_JS_PRESENT, + DRM_PANFROST_PARAM_L2_FEATURES, + DRM_PANFROST_PARAM_CORE_FEATURES, + DRM_PANFROST_PARAM_TILER_FEATURES, + DRM_PANFROST_PARAM_MEM_FEATURES, + DRM_PANFROST_PARAM_MMU_FEATURES, + DRM_PANFROST_PARAM_THREAD_FEATURES, + DRM_PANFROST_PARAM_MAX_THREADS, + DRM_PANFROST_PARAM_THREAD_MAX_WORKGROUP_SZ, + DRM_PANFROST_PARAM_THREAD_MAX_BARRIER_SZ, + DRM_PANFROST_PARAM_COHERENCY_FEATURES, + DRM_PANFROST_PARAM_TEXTURE_FEATURES0, + DRM_PANFROST_PARAM_TEXTURE_FEATURES1, + DRM_PANFROST_PARAM_TEXTURE_FEATURES2, + DRM_PANFROST_PARAM_TEXTURE_FEATURES3, + DRM_PANFROST_PARAM_JS_FEATURES0, + DRM_PANFROST_PARAM_JS_FEATURES1, + DRM_PANFROST_PARAM_JS_FEATURES2, + DRM_PANFROST_PARAM_JS_FEATURES3, + DRM_PANFROST_PARAM_JS_FEATURES4, + DRM_PANFROST_PARAM_JS_FEATURES5, + DRM_PANFROST_PARAM_JS_FEATURES6, + DRM_PANFROST_PARAM_JS_FEATURES7, + DRM_PANFROST_PARAM_JS_FEATURES8, + DRM_PANFROST_PARAM_JS_FEATURES9, + DRM_PANFROST_PARAM_JS_FEATURES10, + DRM_PANFROST_PARAM_JS_FEATURES11, + DRM_PANFROST_PARAM_JS_FEATURES12, + DRM_PANFROST_PARAM_JS_FEATURES13, + DRM_PANFROST_PARAM_JS_FEATURES14, + DRM_PANFROST_PARAM_JS_FEATURES15, + DRM_PANFROST_PARAM_NR_CORE_GROUPS, + DRM_PANFROST_PARAM_THREAD_TLS_ALLOC, + DRM_PANFROST_PARAM_AFBC_FEATURES, + DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP, + DRM_PANFROST_PARAM_SYSTEM_TIMESTAMP_FREQUENCY, +}; + +struct drm_panfrost_get_param { + __u32 param; + __u32 pad; + __u64 value; +}; + +/** + * Returns the offset for the BO in the GPU address space for this DRM fd. + * This is the same value returned by drm_panfrost_create_bo, if that was called + * from this DRM fd. + */ +struct drm_panfrost_get_bo_offset { + __u32 handle; + __u32 pad; + __u64 offset; +}; + +struct drm_panfrost_perfcnt_enable { + __u32 enable; + /* + * On bifrost we have 2 sets of counters, this parameter defines the + * one to track. + */ + __u32 counterset; +}; + +struct drm_panfrost_perfcnt_dump { + __u64 buf_ptr; +}; + +/* madvise provides a way to tell the kernel in case a buffers contents + * can be discarded under memory pressure, which is useful for userspace + * bo cache where we want to optimistically hold on to buffer allocate + * and potential mmap, but allow the pages to be discarded under memory + * pressure. + * + * Typical usage would involve madvise(DONTNEED) when buffer enters BO + * cache, and madvise(WILLNEED) if trying to recycle buffer from BO cache. + * In the WILLNEED case, 'retained' indicates to userspace whether the + * backing pages still exist. + */ +#define PANFROST_MADV_WILLNEED \ + 0 /* backing pages are needed, status returned in 'retained' */ +#define PANFROST_MADV_DONTNEED 1 /* backing pages not needed */ + +struct drm_panfrost_madvise { + __u32 handle; /* in, GEM handle */ + __u32 madv; /* in, PANFROST_MADV_x */ + __u32 retained; /* out, whether backing store still exists */ +}; + +/** + * struct drm_panfrost_set_label_bo - ioctl argument for labelling Panfrost BOs. + */ +struct drm_panfrost_set_label_bo { + /** @handle: Handle of the buffer object to label. */ + __u32 handle; + + /** @pad: MBZ. */ + __u32 pad; + + /** + * @label: User pointer to a NUL-terminated string + * + * Length cannot be greater than 4096. + * NULL is permitted and means clear the label. + */ + __u64 label; +}; + +/* Definitions for coredump decoding in user space */ +#define PANFROSTDUMP_MAJOR 1 +#define PANFROSTDUMP_MINOR 0 + +#define PANFROSTDUMP_MAGIC 0x464E4150 /* PANF */ + +#define PANFROSTDUMP_BUF_REG 0 +#define PANFROSTDUMP_BUF_BOMAP (PANFROSTDUMP_BUF_REG + 1) +#define PANFROSTDUMP_BUF_BO (PANFROSTDUMP_BUF_BOMAP + 1) +#define PANFROSTDUMP_BUF_TRAILER (PANFROSTDUMP_BUF_BO + 1) + +/* + * This structure is the native endianness of the dumping machine, tools can + * detect the endianness by looking at the value in 'magic'. + */ +struct panfrost_dump_object_header { + __u32 magic; + __u32 type; + __u32 file_size; + __u32 file_offset; + + union { + struct { + __u64 jc; + __u32 gpu_id; + __u32 major; + __u32 minor; + __u64 nbos; + } reghdr; + + struct { + __u32 valid; + __u64 iova; + __u32 data[2]; + } bomap; + + /* + * Force same size in case we want to expand the header + * with new fields and also keep it 512-byte aligned + */ + + __u32 sizer[496]; + }; +}; + +/* Registers object, an array of these */ +struct panfrost_dump_registers { + __u32 reg; + __u32 value; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _PANFROST_DRM_H_ */ diff --git a/include/arch/x86_64/drm/panthor_drm.h b/include/arch/x86_64/drm/panthor_drm.h new file mode 100644 index 00000000..cd9b3ad8 --- /dev/null +++ b/include/arch/x86_64/drm/panthor_drm.h @@ -0,0 +1,1140 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright (C) 2023 Collabora ltd. */ +#ifndef _PANTHOR_DRM_H_ +#define _PANTHOR_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/** + * DOC: Introduction + * + * This documentation describes the Panthor IOCTLs. + * + * Just a few generic rules about the data passed to the Panthor IOCTLs: + * + * - Structures must be aligned on 64-bit/8-byte. If the object is not + * naturally aligned, a padding field must be added. + * - Fields must be explicitly aligned to their natural type alignment with + * pad[0..N] fields. + * - All padding fields will be checked by the driver to make sure they are + * zeroed. + * - Flags can be added, but not removed/replaced. + * - New fields can be added to the main structures (the structures + * directly passed to the ioctl). Those fields can be added at the end of + * the structure, or replace existing padding fields. Any new field being + * added must preserve the behavior that existed before those fields were + * added when a value of zero is passed. + * - New fields can be added to indirect objects (objects pointed by the + * main structure), iff those objects are passed a size to reflect the + * size known by the userspace driver (see drm_panthor_obj_array::stride + * or drm_panthor_dev_query::size). + * - If the kernel driver is too old to know some fields, those will be + * ignored if zero, and otherwise rejected (and so will be zero on output). + * - If userspace is too old to know some fields, those will be zeroed + * (input) before the structure is parsed by the kernel driver. + * - Each new flag/field addition must come with a driver version update so + * the userspace driver doesn't have to trial and error to know which + * flags are supported. + * - Structures should not contain unions, as this would defeat the + * extensibility of such structures. + * - IOCTLs can't be removed or replaced. New IOCTL IDs should be placed + * at the end of the drm_panthor_ioctl_id enum. + */ + +/** + * DOC: MMIO regions exposed to userspace. + * + * .. c:macro:: DRM_PANTHOR_USER_MMIO_OFFSET + * + * File offset for all MMIO regions being exposed to userspace. Don't use + * this value directly, use DRM_PANTHOR_USER_<name>_OFFSET values instead. + * pgoffset passed to mmap2() is an unsigned long, which forces us to use a + * different offset on 32-bit and 64-bit systems. + * + * .. c:macro:: DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET + * + * File offset for the LATEST_FLUSH_ID register. The Userspace driver controls + * GPU cache flushing through CS instructions, but the flush reduction + * mechanism requires a flush_id. This flush_id could be queried with an + * ioctl, but Arm provides a well-isolated register page containing only this + * read-only register, so let's expose this page through a static mmap offset + * and allow direct mapping of this MMIO region so we can avoid the + * user <-> kernel round-trip. + */ +#define DRM_PANTHOR_USER_MMIO_OFFSET_32BIT (1ull << 43) +#define DRM_PANTHOR_USER_MMIO_OFFSET_64BIT (1ull << 56) +#define DRM_PANTHOR_USER_MMIO_OFFSET \ + (sizeof(unsigned long) < 8 ? DRM_PANTHOR_USER_MMIO_OFFSET_32BIT : \ + DRM_PANTHOR_USER_MMIO_OFFSET_64BIT) +#define DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET (DRM_PANTHOR_USER_MMIO_OFFSET | 0) + +/** + * DOC: IOCTL IDs + * + * enum drm_panthor_ioctl_id - IOCTL IDs + * + * Place new ioctls at the end, don't re-order, don't replace or remove entries. + * + * These IDs are not meant to be used directly. Use the DRM_IOCTL_PANTHOR_xxx + * definitions instead. + */ +enum drm_panthor_ioctl_id { + /** @DRM_PANTHOR_DEV_QUERY: Query device information. */ + DRM_PANTHOR_DEV_QUERY = 0, + + /** @DRM_PANTHOR_VM_CREATE: Create a VM. */ + DRM_PANTHOR_VM_CREATE, + + /** @DRM_PANTHOR_VM_DESTROY: Destroy a VM. */ + DRM_PANTHOR_VM_DESTROY, + + /** @DRM_PANTHOR_VM_BIND: Bind/unbind memory to a VM. */ + DRM_PANTHOR_VM_BIND, + + /** @DRM_PANTHOR_VM_GET_STATE: Get VM state. */ + DRM_PANTHOR_VM_GET_STATE, + + /** @DRM_PANTHOR_BO_CREATE: Create a buffer object. */ + DRM_PANTHOR_BO_CREATE, + + /** + * @DRM_PANTHOR_BO_MMAP_OFFSET: Get the file offset to pass to + * mmap to map a GEM object. + */ + DRM_PANTHOR_BO_MMAP_OFFSET, + + /** @DRM_PANTHOR_GROUP_CREATE: Create a scheduling group. */ + DRM_PANTHOR_GROUP_CREATE, + + /** @DRM_PANTHOR_GROUP_DESTROY: Destroy a scheduling group. */ + DRM_PANTHOR_GROUP_DESTROY, + + /** + * @DRM_PANTHOR_GROUP_SUBMIT: Submit jobs to queues belonging + * to a specific scheduling group. + */ + DRM_PANTHOR_GROUP_SUBMIT, + + /** @DRM_PANTHOR_GROUP_GET_STATE: Get the state of a scheduling group. + */ + DRM_PANTHOR_GROUP_GET_STATE, + + /** @DRM_PANTHOR_TILER_HEAP_CREATE: Create a tiler heap. */ + DRM_PANTHOR_TILER_HEAP_CREATE, + + /** @DRM_PANTHOR_TILER_HEAP_DESTROY: Destroy a tiler heap. */ + DRM_PANTHOR_TILER_HEAP_DESTROY, + + /** @DRM_PANTHOR_BO_SET_LABEL: Label a BO. */ + DRM_PANTHOR_BO_SET_LABEL, + + /** + * @DRM_PANTHOR_SET_USER_MMIO_OFFSET: Set the offset to use as the user + * MMIO offset. + * + * The default behavior is to pick the MMIO offset based on the size of + * the pgoff_t type seen by the process that manipulates the FD, such + * that a 32-bit process can always map the user MMIO ranges. But this + * approach doesn't work well for emulators like FEX, where the emulator + * is an 64-bit binary which might be executing 32-bit code. In that + * case, the kernel thinks it's the 64-bit process and assumes + * DRM_PANTHOR_USER_MMIO_OFFSET_64BIT is in use, but the UMD library + * expects DRM_PANTHOR_USER_MMIO_OFFSET_32BIT, because it can't mmap() + * anything above the pgoff_t size. + */ + DRM_PANTHOR_SET_USER_MMIO_OFFSET, +}; + +/** + * DOC: IOCTL arguments + */ + +/** + * struct drm_panthor_obj_array - Object array. + * + * This object is used to pass an array of objects whose size is subject to + * changes in future versions of the driver. In order to support this + * mutability, we pass a stride describing the size of the object as known by + * userspace. + * + * You shouldn't fill drm_panthor_obj_array fields directly. You should instead + * use the DRM_PANTHOR_OBJ_ARRAY() macro that takes care of initializing the + * stride to the object size. + */ +struct drm_panthor_obj_array { + /** @stride: Stride of object struct. Used for versioning. */ + __u32 stride; + + /** @count: Number of objects in the array. */ + __u32 count; + + /** @array: User pointer to an array of objects. */ + __u64 array; +}; + +/** + * DRM_PANTHOR_OBJ_ARRAY() - Initialize a drm_panthor_obj_array field. + * @cnt: Number of elements in the array. + * @ptr: Pointer to the array to pass to the kernel. + * + * Macro initializing a drm_panthor_obj_array based on the object size as known + * by userspace. + */ +#define DRM_PANTHOR_OBJ_ARRAY(cnt, ptr) \ + { .stride = sizeof((ptr)[0]), \ + .count = (cnt), \ + .array = (__u64)(uintptr_t)(ptr) } + +/** + * enum drm_panthor_sync_op_flags - Synchronization operation flags. + */ +enum drm_panthor_sync_op_flags { + /** @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK: Synchronization handle type + mask. */ + DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK = 0xff, + + /** @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ: Synchronization object + type. */ + DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ = 0, + + /** + * @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ: Timeline + * synchronization object type. + */ + DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ = 1, + + /** @DRM_PANTHOR_SYNC_OP_WAIT: Wait operation. */ + DRM_PANTHOR_SYNC_OP_WAIT = 0 << 31, + + /** @DRM_PANTHOR_SYNC_OP_SIGNAL: Signal operation. */ + DRM_PANTHOR_SYNC_OP_SIGNAL = (int)(1u << 31), +}; + +/** + * struct drm_panthor_sync_op - Synchronization operation. + */ +struct drm_panthor_sync_op { + /** @flags: Synchronization operation flags. Combination of + * DRM_PANTHOR_SYNC_OP values. */ + __u32 flags; + + /** @handle: Sync handle. */ + __u32 handle; + + /** + * @timeline_value: MBZ if + * (flags & DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK) != + * DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ. + */ + __u64 timeline_value; +}; + +/** + * enum drm_panthor_dev_query_type - Query type + * + * Place new types at the end, don't re-order, don't remove or replace. + */ +enum drm_panthor_dev_query_type { + /** @DRM_PANTHOR_DEV_QUERY_GPU_INFO: Query GPU information. */ + DRM_PANTHOR_DEV_QUERY_GPU_INFO = 0, + + /** @DRM_PANTHOR_DEV_QUERY_CSIF_INFO: Query command-stream interface + information. */ + DRM_PANTHOR_DEV_QUERY_CSIF_INFO, + + /** @DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO: Query timestamp information. + */ + DRM_PANTHOR_DEV_QUERY_TIMESTAMP_INFO, + + /** + * @DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO: Query allowed group + * priorities information. + */ + DRM_PANTHOR_DEV_QUERY_GROUP_PRIORITIES_INFO, +}; + +/** + * struct drm_panthor_gpu_info - GPU information + * + * Structure grouping all queryable information relating to the GPU. + */ +struct drm_panthor_gpu_info { + /** @gpu_id : GPU ID. */ + __u32 gpu_id; +#define DRM_PANTHOR_ARCH_MAJOR(x) ((x) >> 28) +#define DRM_PANTHOR_ARCH_MINOR(x) (((x) >> 24) & 0xf) +#define DRM_PANTHOR_ARCH_REV(x) (((x) >> 20) & 0xf) +#define DRM_PANTHOR_PRODUCT_MAJOR(x) (((x) >> 16) & 0xf) +#define DRM_PANTHOR_VERSION_MAJOR(x) (((x) >> 12) & 0xf) +#define DRM_PANTHOR_VERSION_MINOR(x) (((x) >> 4) & 0xff) +#define DRM_PANTHOR_VERSION_STATUS(x) ((x) & 0xf) + + /** @gpu_rev: GPU revision. */ + __u32 gpu_rev; + + /** @csf_id: Command stream frontend ID. */ + __u32 csf_id; +#define DRM_PANTHOR_CSHW_MAJOR(x) (((x) >> 26) & 0x3f) +#define DRM_PANTHOR_CSHW_MINOR(x) (((x) >> 20) & 0x3f) +#define DRM_PANTHOR_CSHW_REV(x) (((x) >> 16) & 0xf) +#define DRM_PANTHOR_MCU_MAJOR(x) (((x) >> 10) & 0x3f) +#define DRM_PANTHOR_MCU_MINOR(x) (((x) >> 4) & 0x3f) +#define DRM_PANTHOR_MCU_REV(x) ((x) & 0xf) + + /** @l2_features: L2-cache features. */ + __u32 l2_features; + + /** @tiler_features: Tiler features. */ + __u32 tiler_features; + + /** @mem_features: Memory features. */ + __u32 mem_features; + + /** @mmu_features: MMU features. */ + __u32 mmu_features; +#define DRM_PANTHOR_MMU_VA_BITS(x) ((x) & 0xff) + + /** @thread_features: Thread features. */ + __u32 thread_features; + + /** @max_threads: Maximum number of threads. */ + __u32 max_threads; + + /** @thread_max_workgroup_size: Maximum workgroup size. */ + __u32 thread_max_workgroup_size; + + /** + * @thread_max_barrier_size: Maximum number of threads that can wait + * simultaneously on a barrier. + */ + __u32 thread_max_barrier_size; + + /** @coherency_features: Coherency features. */ + __u32 coherency_features; + + /** @texture_features: Texture features. */ + __u32 texture_features[4]; + + /** @as_present: Bitmask encoding the number of address-space exposed by + * the MMU. */ + __u32 as_present; + + /** @pad0: MBZ. */ + __u32 pad0; + + /** @shader_present: Bitmask encoding the shader cores exposed by the + * GPU. */ + __u64 shader_present; + + /** @l2_present: Bitmask encoding the L2 caches exposed by the GPU. */ + __u64 l2_present; + + /** @tiler_present: Bitmask encoding the tiler units exposed by the GPU. + */ + __u64 tiler_present; + + /** @core_features: Used to discriminate core variants when they exist. + */ + __u32 core_features; + + /** @pad: MBZ. */ + __u32 pad; +}; + +/** + * struct drm_panthor_csif_info - Command stream interface information + * + * Structure grouping all queryable information relating to the command stream + * interface. + */ +struct drm_panthor_csif_info { + /** @csg_slot_count: Number of command stream group slots exposed by the + * firmware. */ + __u32 csg_slot_count; + + /** @cs_slot_count: Number of command stream slots per group. */ + __u32 cs_slot_count; + + /** @cs_reg_count: Number of command stream registers. */ + __u32 cs_reg_count; + + /** @scoreboard_slot_count: Number of scoreboard slots. */ + __u32 scoreboard_slot_count; + + /** + * @unpreserved_cs_reg_count: Number of command stream registers + * reserved by the kernel driver to call a userspace command stream. + * + * All registers can be used by a userspace command stream, but the + * [cs_slot_count - unpreserved_cs_reg_count .. cs_slot_count] registers + * are used by the kernel when DRM_PANTHOR_IOCTL_GROUP_SUBMIT is called. + */ + __u32 unpreserved_cs_reg_count; + + /** + * @pad: Padding field, set to zero. + */ + __u32 pad; +}; + +/** + * struct drm_panthor_timestamp_info - Timestamp information + * + * Structure grouping all queryable information relating to the GPU timestamp. + */ +struct drm_panthor_timestamp_info { + /** + * @timestamp_frequency: The frequency of the timestamp timer or 0 if + * unknown. + */ + __u64 timestamp_frequency; + + /** @current_timestamp: The current timestamp. */ + __u64 current_timestamp; + + /** @timestamp_offset: The offset of the timestamp timer. */ + __u64 timestamp_offset; +}; + +/** + * struct drm_panthor_group_priorities_info - Group priorities information + * + * Structure grouping all queryable information relating to the allowed group + * priorities. + */ +struct drm_panthor_group_priorities_info { + /** + * @allowed_mask: Bitmask of the allowed group priorities. + * + * Each bit represents a variant of the enum drm_panthor_group_priority. + */ + __u8 allowed_mask; + + /** @pad: Padding fields, MBZ. */ + __u8 pad[3]; +}; + +/** + * struct drm_panthor_dev_query - Arguments passed to + * DRM_PANTHOR_IOCTL_DEV_QUERY + */ +struct drm_panthor_dev_query { + /** @type: the query type (see drm_panthor_dev_query_type). */ + __u32 type; + + /** + * @size: size of the type being queried. + * + * If pointer is NULL, size is updated by the driver to provide the + * output structure size. If pointer is not NULL, the driver will + * only copy min(size, actual_structure_size) bytes to the pointer, + * and update the size accordingly. This allows us to extend query + * types without breaking userspace. + */ + __u32 size; + + /** + * @pointer: user pointer to a query type struct. + * + * Pointer can be NULL, in which case, nothing is copied, but the + * actual structure size is returned. If not NULL, it must point to + * a location that's large enough to hold size bytes. + */ + __u64 pointer; +}; + +/** + * struct drm_panthor_vm_create - Arguments passed to + * DRM_PANTHOR_IOCTL_VM_CREATE + */ +struct drm_panthor_vm_create { + /** @flags: VM flags, MBZ. */ + __u32 flags; + + /** @id: Returned VM ID. */ + __u32 id; + + /** + * @user_va_range: Size of the VA space reserved for user objects. + * + * The kernel will pick the remaining space to map kernel-only objects + * to the VM (heap chunks, heap context, ring buffers, kernel + * synchronization objects, + * ...). If the space left for kernel objects is too small, kernel + * object allocation will fail further down the road. One can use + * drm_panthor_gpu_info::mmu_features to extract the total virtual + * address range, and chose a user_va_range that leaves some space to + * the kernel. + * + * If user_va_range is zero, the kernel will pick a sensible value based + * on TASK_SIZE and the virtual range supported by the GPU MMU (the + * kernel/user split should leave enough VA space for userspace + * processes to support SVM, while still allowing the kernel to map some + * amount of kernel objects in the kernel VA range). The value chosen by + * the driver will be returned in + * @user_va_range. + * + * User VA space always starts at 0x0, kernel VA space is always placed + * after the user VA range. + */ + __u64 user_va_range; +}; + +/** + * struct drm_panthor_vm_destroy - Arguments passed to + * DRM_PANTHOR_IOCTL_VM_DESTROY + */ +struct drm_panthor_vm_destroy { + /** @id: ID of the VM to destroy. */ + __u32 id; + + /** @pad: MBZ. */ + __u32 pad; +}; + +/** + * enum drm_panthor_vm_bind_op_flags - VM bind operation flags + */ +enum drm_panthor_vm_bind_op_flags { + /** + * @DRM_PANTHOR_VM_BIND_OP_MAP_READONLY: Map the memory read-only. + * + * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. + */ + DRM_PANTHOR_VM_BIND_OP_MAP_READONLY = 1 << 0, + + /** + * @DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC: Map the memory not-executable. + * + * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. + */ + DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC = 1 << 1, + + /** + * @DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED: Map the memory uncached. + * + * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. + */ + DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED = 1 << 2, + + /** + * @DRM_PANTHOR_VM_BIND_OP_TYPE_MASK: Mask used to determine the type of + * operation. + */ + DRM_PANTHOR_VM_BIND_OP_TYPE_MASK = (int)(0xfu << 28), + + /** @DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: Map operation. */ + DRM_PANTHOR_VM_BIND_OP_TYPE_MAP = 0 << 28, + + /** @DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: Unmap operation. */ + DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP = 1 << 28, + + /** + * @DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY: No VM operation. + * + * Just serves as a synchronization point on a VM queue. + * + * Only valid if %DRM_PANTHOR_VM_BIND_ASYNC is set in + * drm_panthor_vm_bind::flags, and drm_panthor_vm_bind_op::syncs + * contains at least one element. + */ + DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY = 2 << 28, +}; + +/** + * struct drm_panthor_vm_bind_op - VM bind operation + */ +struct drm_panthor_vm_bind_op { + /** @flags: Combination of drm_panthor_vm_bind_op_flags flags. */ + __u32 flags; + + /** + * @bo_handle: Handle of the buffer object to map. + * MBZ for unmap or sync-only operations. + */ + __u32 bo_handle; + + /** + * @bo_offset: Buffer object offset. + * MBZ for unmap or sync-only operations. + */ + __u64 bo_offset; + + /** + * @va: Virtual address to map/unmap. + * MBZ for sync-only operations. + */ + __u64 va; + + /** + * @size: Size to map/unmap. + * MBZ for sync-only operations. + */ + __u64 size; + + /** + * @syncs: Array of struct drm_panthor_sync_op synchronization + * operations. + * + * This array must be empty if %DRM_PANTHOR_VM_BIND_ASYNC is not set on + * the drm_panthor_vm_bind object containing this VM bind operation. + * + * This array shall not be empty for sync-only operations. + */ + struct drm_panthor_obj_array syncs; +}; + +/** + * enum drm_panthor_vm_bind_flags - VM bind flags + */ +enum drm_panthor_vm_bind_flags { + /** + * @DRM_PANTHOR_VM_BIND_ASYNC: VM bind operations are queued to the VM + * queue instead of being executed synchronously. + */ + DRM_PANTHOR_VM_BIND_ASYNC = 1 << 0, +}; + +/** + * struct drm_panthor_vm_bind - Arguments passed to DRM_IOCTL_PANTHOR_VM_BIND + */ +struct drm_panthor_vm_bind { + /** @vm_id: VM targeted by the bind request. */ + __u32 vm_id; + + /** @flags: Combination of drm_panthor_vm_bind_flags flags. */ + __u32 flags; + + /** @ops: Array of struct drm_panthor_vm_bind_op bind operations. */ + struct drm_panthor_obj_array ops; +}; + +/** + * enum drm_panthor_vm_state - VM states. + */ +enum drm_panthor_vm_state { + /** + * @DRM_PANTHOR_VM_STATE_USABLE: VM is usable. + * + * New VM operations will be accepted on this VM. + */ + DRM_PANTHOR_VM_STATE_USABLE, + + /** + * @DRM_PANTHOR_VM_STATE_UNUSABLE: VM is unusable. + * + * Something put the VM in an unusable state (like an asynchronous + * VM_BIND request failing for any reason). + * + * Once the VM is in this state, all new MAP operations will be + * rejected, and any GPU job targeting this VM will fail. + * UNMAP operations are still accepted. + * + * The only way to recover from an unusable VM is to create a new + * VM, and destroy the old one. + */ + DRM_PANTHOR_VM_STATE_UNUSABLE, +}; + +/** + * struct drm_panthor_vm_get_state - Get VM state. + */ +struct drm_panthor_vm_get_state { + /** @vm_id: VM targeted by the get_state request. */ + __u32 vm_id; + + /** + * @state: state returned by the driver. + * + * Must be one of the enum drm_panthor_vm_state values. + */ + __u32 state; +}; + +/** + * enum drm_panthor_bo_flags - Buffer object flags, passed at creation time. + */ +enum drm_panthor_bo_flags { + /** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped + in userspace. */ + DRM_PANTHOR_BO_NO_MMAP = (1 << 0), +}; + +/** + * struct drm_panthor_bo_create - Arguments passed to + * DRM_IOCTL_PANTHOR_BO_CREATE. + */ +struct drm_panthor_bo_create { + /** + * @size: Requested size for the object + * + * The (page-aligned) allocated size for the object will be returned. + */ + __u64 size; + + /** + * @flags: Flags. Must be a combination of drm_panthor_bo_flags flags. + */ + __u32 flags; + + /** + * @exclusive_vm_id: Exclusive VM this buffer object will be mapped to. + * + * If not zero, the field must refer to a valid VM ID, and implies that: + * - the buffer object will only ever be bound to that VM + * - cannot be exported as a PRIME fd + */ + __u32 exclusive_vm_id; + + /** + * @handle: Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; + + /** @pad: MBZ. */ + __u32 pad; +}; + +/** + * struct drm_panthor_bo_mmap_offset - Arguments passed to + * DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET. + */ +struct drm_panthor_bo_mmap_offset { + /** @handle: Handle of the object we want an mmap offset for. */ + __u32 handle; + + /** @pad: MBZ. */ + __u32 pad; + + /** @offset: The fake offset to use for subsequent mmap calls. */ + __u64 offset; +}; + +/** + * struct drm_panthor_queue_create - Queue creation arguments. + */ +struct drm_panthor_queue_create { + /** + * @priority: Defines the priority of queues inside a group. Goes from 0 + * to 15, 15 being the highest priority. + */ + __u8 priority; + + /** @pad: Padding fields, MBZ. */ + __u8 pad[3]; + + /** @ringbuf_size: Size of the ring buffer to allocate to this queue. */ + __u32 ringbuf_size; +}; + +/** + * enum drm_panthor_group_priority - Scheduling group priority + */ +enum drm_panthor_group_priority { + /** @PANTHOR_GROUP_PRIORITY_LOW: Low priority group. */ + PANTHOR_GROUP_PRIORITY_LOW = 0, + + /** @PANTHOR_GROUP_PRIORITY_MEDIUM: Medium priority group. */ + PANTHOR_GROUP_PRIORITY_MEDIUM, + + /** + * @PANTHOR_GROUP_PRIORITY_HIGH: High priority group. + * + * Requires CAP_SYS_NICE or DRM_MASTER. + */ + PANTHOR_GROUP_PRIORITY_HIGH, + + /** + * @PANTHOR_GROUP_PRIORITY_REALTIME: Realtime priority group. + * + * Requires CAP_SYS_NICE or DRM_MASTER. + */ + PANTHOR_GROUP_PRIORITY_REALTIME, +}; + +/** + * struct drm_panthor_group_create - Arguments passed to + * DRM_IOCTL_PANTHOR_GROUP_CREATE + */ +struct drm_panthor_group_create { + /** @queues: Array of drm_panthor_queue_create elements. */ + struct drm_panthor_obj_array queues; + + /** + * @max_compute_cores: Maximum number of cores that can be used by + * compute jobs across CS queues bound to this group. + * + * Must be less or equal to the number of bits set in + * @compute_core_mask. + */ + __u8 max_compute_cores; + + /** + * @max_fragment_cores: Maximum number of cores that can be used by + * fragment jobs across CS queues bound to this group. + * + * Must be less or equal to the number of bits set in + * @fragment_core_mask. + */ + __u8 max_fragment_cores; + + /** + * @max_tiler_cores: Maximum number of tilers that can be used by tiler + * jobs across CS queues bound to this group. + * + * Must be less or equal to the number of bits set in @tiler_core_mask. + */ + __u8 max_tiler_cores; + + /** @priority: Group priority (see enum drm_panthor_group_priority). */ + __u8 priority; + + /** @pad: Padding field, MBZ. */ + __u32 pad; + + /** + * @compute_core_mask: Mask encoding cores that can be used for compute + * jobs. + * + * This field must have at least @max_compute_cores bits set. + * + * The bits set here should also be set in + * drm_panthor_gpu_info::shader_present. + */ + __u64 compute_core_mask; + + /** + * @fragment_core_mask: Mask encoding cores that can be used for + * fragment jobs. + * + * This field must have at least @max_fragment_cores bits set. + * + * The bits set here should also be set in + * drm_panthor_gpu_info::shader_present. + */ + __u64 fragment_core_mask; + + /** + * @tiler_core_mask: Mask encoding cores that can be used for tiler + * jobs. + * + * This field must have at least @max_tiler_cores bits set. + * + * The bits set here should also be set in + * drm_panthor_gpu_info::tiler_present. + */ + __u64 tiler_core_mask; + + /** + * @vm_id: VM ID to bind this group to. + * + * All submission to queues bound to this group will use this VM. + */ + __u32 vm_id; + + /** + * @group_handle: Returned group handle. Passed back when submitting + * jobs or destroying a group. + */ + __u32 group_handle; +}; + +/** + * struct drm_panthor_group_destroy - Arguments passed to + * DRM_IOCTL_PANTHOR_GROUP_DESTROY + */ +struct drm_panthor_group_destroy { + /** @group_handle: Group to destroy */ + __u32 group_handle; + + /** @pad: Padding field, MBZ. */ + __u32 pad; +}; + +/** + * struct drm_panthor_queue_submit - Job submission arguments. + * + * This is describing the userspace command stream to call from the kernel + * command stream ring-buffer. Queue submission is always part of a group + * submission, taking one or more jobs to submit to the underlying queues. + */ +struct drm_panthor_queue_submit { + /** @queue_index: Index of the queue inside a group. */ + __u32 queue_index; + + /** + * @stream_size: Size of the command stream to execute. + * + * Must be 64-bit/8-byte aligned (the size of a CS instruction) + * + * Can be zero if stream_addr is zero too. + * + * When the stream size is zero, the queue submit serves as a + * synchronization point. + */ + __u32 stream_size; + + /** + * @stream_addr: GPU address of the command stream to execute. + * + * Must be aligned on 64-byte. + * + * Can be zero is stream_size is zero too. + */ + __u64 stream_addr; + + /** + * @latest_flush: FLUSH_ID read at the time the stream was built. + * + * This allows cache flush elimination for the automatic + * flush+invalidate(all) done at submission time, which is needed to + * ensure the GPU doesn't get garbage when reading the indirect command + * stream buffers. If you want the cache flush to happen + * unconditionally, pass a zero here. + * + * Ignored when stream_size is zero. + */ + __u32 latest_flush; + + /** @pad: MBZ. */ + __u32 pad; + + /** @syncs: Array of struct drm_panthor_sync_op sync operations. */ + struct drm_panthor_obj_array syncs; +}; + +/** + * struct drm_panthor_group_submit - Arguments passed to + * DRM_IOCTL_PANTHOR_GROUP_SUBMIT + */ +struct drm_panthor_group_submit { + /** @group_handle: Handle of the group to queue jobs to. */ + __u32 group_handle; + + /** @pad: MBZ. */ + __u32 pad; + + /** @queue_submits: Array of drm_panthor_queue_submit objects. */ + struct drm_panthor_obj_array queue_submits; +}; + +/** + * enum drm_panthor_group_state_flags - Group state flags + */ +enum drm_panthor_group_state_flags { + /** + * @DRM_PANTHOR_GROUP_STATE_TIMEDOUT: Group had unfinished jobs. + * + * When a group ends up with this flag set, no jobs can be submitted to + * its queues. + */ + DRM_PANTHOR_GROUP_STATE_TIMEDOUT = 1 << 0, + + /** + * @DRM_PANTHOR_GROUP_STATE_FATAL_FAULT: Group had fatal faults. + * + * When a group ends up with this flag set, no jobs can be submitted to + * its queues. + */ + DRM_PANTHOR_GROUP_STATE_FATAL_FAULT = 1 << 1, + + /** + * @DRM_PANTHOR_GROUP_STATE_INNOCENT: Group was killed during a reset + * caused by other groups. + * + * This flag can only be set if DRM_PANTHOR_GROUP_STATE_TIMEDOUT is set + * and DRM_PANTHOR_GROUP_STATE_FATAL_FAULT is not. + */ + DRM_PANTHOR_GROUP_STATE_INNOCENT = 1 << 2, +}; + +/** + * struct drm_panthor_group_get_state - Arguments passed to + * DRM_IOCTL_PANTHOR_GROUP_GET_STATE + * + * Used to query the state of a group and decide whether a new group should be + * created to replace it. + */ +struct drm_panthor_group_get_state { + /** @group_handle: Handle of the group to query state on */ + __u32 group_handle; + + /** + * @state: Combination of DRM_PANTHOR_GROUP_STATE_* flags encoding the + * group state. + */ + __u32 state; + + /** @fatal_queues: Bitmask of queues that faced fatal faults. */ + __u32 fatal_queues; + + /** @pad: MBZ */ + __u32 pad; +}; + +/** + * struct drm_panthor_tiler_heap_create - Arguments passed to + * DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE + */ +struct drm_panthor_tiler_heap_create { + /** @vm_id: VM ID the tiler heap should be mapped to */ + __u32 vm_id; + + /** @initial_chunk_count: Initial number of chunks to allocate. Must be + * at least one. */ + __u32 initial_chunk_count; + + /** + * @chunk_size: Chunk size. + * + * Must be page-aligned and lie in the [128k:8M] range. + */ + __u32 chunk_size; + + /** + * @max_chunks: Maximum number of chunks that can be allocated. + * + * Must be at least @initial_chunk_count. + */ + __u32 max_chunks; + + /** + * @target_in_flight: Maximum number of in-flight render passes. + * + * If the heap has more than tiler jobs in-flight, the FW will wait for + * render passes to finish before queuing new tiler jobs. + */ + __u32 target_in_flight; + + /** @handle: Returned heap handle. Passed back to DESTROY_TILER_HEAP. */ + __u32 handle; + + /** @tiler_heap_ctx_gpu_va: Returned heap GPU virtual address returned + */ + __u64 tiler_heap_ctx_gpu_va; + + /** + * @first_heap_chunk_gpu_va: First heap chunk. + * + * The tiler heap is formed of heap chunks forming a single-link list. + * This is the first element in the list. + */ + __u64 first_heap_chunk_gpu_va; +}; + +/** + * struct drm_panthor_tiler_heap_destroy - Arguments passed to + * DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY + */ +struct drm_panthor_tiler_heap_destroy { + /** + * @handle: Handle of the tiler heap to destroy. + * + * Must be a valid heap handle returned by + * DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE. + */ + __u32 handle; + + /** @pad: Padding field, MBZ. */ + __u32 pad; +}; + +/** + * struct drm_panthor_bo_set_label - Arguments passed to + * DRM_IOCTL_PANTHOR_BO_SET_LABEL + */ +struct drm_panthor_bo_set_label { + /** @handle: Handle of the buffer object to label. */ + __u32 handle; + + /** @pad: MBZ. */ + __u32 pad; + + /** + * @label: User pointer to a NUL-terminated string + * + * Length cannot be greater than 4096 + */ + __u64 label; +}; + +/** + * struct drm_panthor_set_user_mmio_offset - Arguments passed to + * DRM_IOCTL_PANTHOR_SET_USER_MMIO_OFFSET + * + * This ioctl is only really useful if you want to support userspace + * CPU emulation environments where the size of an unsigned long differs + * between the host and the guest architectures. + */ +struct drm_panthor_set_user_mmio_offset { + /** + * @offset: User MMIO offset to use. + * + * Must be either DRM_PANTHOR_USER_MMIO_OFFSET_32BIT or + * DRM_PANTHOR_USER_MMIO_OFFSET_64BIT. + * + * Use DRM_PANTHOR_USER_MMIO_OFFSET (which selects OFFSET_32BIT or + * OFFSET_64BIT based on the size of an unsigned long) unless you + * have a very good reason to overrule this decision. + */ + __u64 offset; +}; + +/** + * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number + * @__access: Access type. Must be R, W or RW. + * @__id: One of the DRM_PANTHOR_xxx id. + * @__type: Suffix of the type being passed to the IOCTL. + * + * Don't use this macro directly, use the DRM_IOCTL_PANTHOR_xxx + * values instead. + * + * Return: An IOCTL number to be passed to ioctl() from userspace. + */ +#define DRM_IOCTL_PANTHOR(__access, __id, __type) \ + DRM_IO##__access(DRM_COMMAND_BASE + DRM_PANTHOR_##__id, \ + struct drm_panthor_##__type) + +enum { + DRM_IOCTL_PANTHOR_DEV_QUERY = + DRM_IOCTL_PANTHOR(WR, DEV_QUERY, dev_query), + DRM_IOCTL_PANTHOR_VM_CREATE = + DRM_IOCTL_PANTHOR(WR, VM_CREATE, vm_create), + DRM_IOCTL_PANTHOR_VM_DESTROY = + DRM_IOCTL_PANTHOR(WR, VM_DESTROY, vm_destroy), + DRM_IOCTL_PANTHOR_VM_BIND = DRM_IOCTL_PANTHOR(WR, VM_BIND, vm_bind), + DRM_IOCTL_PANTHOR_VM_GET_STATE = + DRM_IOCTL_PANTHOR(WR, VM_GET_STATE, vm_get_state), + DRM_IOCTL_PANTHOR_BO_CREATE = + DRM_IOCTL_PANTHOR(WR, BO_CREATE, bo_create), + DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET = + DRM_IOCTL_PANTHOR(WR, BO_MMAP_OFFSET, bo_mmap_offset), + DRM_IOCTL_PANTHOR_GROUP_CREATE = + DRM_IOCTL_PANTHOR(WR, GROUP_CREATE, group_create), + DRM_IOCTL_PANTHOR_GROUP_DESTROY = + DRM_IOCTL_PANTHOR(WR, GROUP_DESTROY, group_destroy), + DRM_IOCTL_PANTHOR_GROUP_SUBMIT = + DRM_IOCTL_PANTHOR(WR, GROUP_SUBMIT, group_submit), + DRM_IOCTL_PANTHOR_GROUP_GET_STATE = + DRM_IOCTL_PANTHOR(WR, GROUP_GET_STATE, group_get_state), + DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE = + DRM_IOCTL_PANTHOR(WR, TILER_HEAP_CREATE, tiler_heap_create), + DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY = + DRM_IOCTL_PANTHOR(WR, TILER_HEAP_DESTROY, tiler_heap_destroy), + DRM_IOCTL_PANTHOR_BO_SET_LABEL = + DRM_IOCTL_PANTHOR(WR, BO_SET_LABEL, bo_set_label), + DRM_IOCTL_PANTHOR_SET_USER_MMIO_OFFSET = DRM_IOCTL_PANTHOR( + WR, SET_USER_MMIO_OFFSET, set_user_mmio_offset), +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _PANTHOR_DRM_H_ */ diff --git a/include/arch/x86_64/drm/pvr_drm.h b/include/arch/x86_64/drm/pvr_drm.h new file mode 100644 index 00000000..fdaac8dd --- /dev/null +++ b/include/arch/x86_64/drm/pvr_drm.h @@ -0,0 +1,1331 @@ +/* SPDX-License-Identifier: (GPL-2.0-only WITH Linux-syscall-note) OR MIT */ +/* Copyright (c) 2023 Imagination Technologies Ltd. */ + +#ifndef PVR_DRM_UAPI_H +#define PVR_DRM_UAPI_H + +#include "drm.h" + +#include <linux/const.h> +#include <linux/types.h> + +#if defined(__cplusplus) +extern "C" { +#endif + +/** + * DOC: PowerVR UAPI + * + * The PowerVR IOCTL argument structs have a few limitations in place, in + * addition to the standard kernel restrictions: + * + * - All members must be type-aligned. + * - The overall struct must be padded to 64-bit alignment. + * - Explicit padding is almost always required. This takes the form of + * ``_padding_[x]`` members of sufficient size to pad to the next + * power-of-two alignment, where [x] is the offset into the struct in + * hexadecimal. Arrays are never used for alignment. Padding fields must be + * zeroed; this is always checked. + * - Unions may only appear as the last member of a struct. + * - Individual union members may grow in the future. The space between the + * end of a union member and the end of its containing union is considered + * "implicit padding" and must be zeroed. This is always checked. + * + * In addition to the IOCTL argument structs, the PowerVR UAPI makes use of + * DEV_QUERY argument structs. These are used to fetch information about the + * device and runtime. These structs are subject to the same rules set out + * above. + */ + +/** + * struct drm_pvr_obj_array - Container used to pass arrays of objects + * + * It is not unusual to have to extend objects to pass new parameters, and the + * DRM ioctl infrastructure is supporting that by padding ioctl arguments with + * zeros when the data passed by userspace is smaller than the struct defined in + * the drm_ioctl_desc, thus keeping things backward compatible. This type is + * just applying the same concepts to indirect objects passed through arrays + * referenced from the main ioctl arguments structure: the stride basically + * defines the size of the object passed by userspace, which allows the kernel + * driver to pad with zeros when it's smaller than the size of the object it + * expects. + * + * Use ``DRM_PVR_OBJ_ARRAY()`` to fill object array fields, unless you + * have a very good reason not to. + */ +struct drm_pvr_obj_array { + /** @stride: Stride of object struct. Used for versioning. */ + __u32 stride; + + /** @count: Number of objects in the array. */ + __u32 count; + + /** @array: User pointer to an array of objects. */ + __u64 array; +}; + +/** + * DRM_PVR_OBJ_ARRAY() - Helper macro for filling &struct drm_pvr_obj_array. + * @cnt: Number of elements pointed to py @ptr. + * @ptr: Pointer to start of a C array. + * + * Return: Literal of type &struct drm_pvr_obj_array. + */ +#define DRM_PVR_OBJ_ARRAY(cnt, ptr) \ + { .stride = sizeof((ptr)[0]), \ + .count = (cnt), \ + .array = (__u64)(uintptr_t)(ptr) } + +/** + * DOC: PowerVR IOCTL interface + */ + +/** + * PVR_IOCTL() - Build a PowerVR IOCTL number + * @_ioctl: An incrementing id for this IOCTL. Added to %DRM_COMMAND_BASE. + * @_mode: Must be one of %DRM_IOR, %DRM_IOW or %DRM_IOWR. + * @_data: The type of the args struct passed by this IOCTL. + * + * The struct referred to by @_data must have a ``drm_pvr_ioctl_`` prefix and an + * ``_args suffix``. They are therefore omitted from @_data. + * + * This should only be used to build the constants described below; it should + * never be used to call an IOCTL directly. + * + * Return: An IOCTL number to be passed to ioctl() from userspace. + */ +#define PVR_IOCTL(_ioctl, _mode, _data) \ + _mode(DRM_COMMAND_BASE + (_ioctl), struct drm_pvr_ioctl_##_data##_args) + +#define DRM_IOCTL_PVR_DEV_QUERY PVR_IOCTL(0x00, DRM_IOWR, dev_query) +#define DRM_IOCTL_PVR_CREATE_BO PVR_IOCTL(0x01, DRM_IOWR, create_bo) +#define DRM_IOCTL_PVR_GET_BO_MMAP_OFFSET \ + PVR_IOCTL(0x02, DRM_IOWR, get_bo_mmap_offset) +#define DRM_IOCTL_PVR_CREATE_VM_CONTEXT \ + PVR_IOCTL(0x03, DRM_IOWR, create_vm_context) +#define DRM_IOCTL_PVR_DESTROY_VM_CONTEXT \ + PVR_IOCTL(0x04, DRM_IOW, destroy_vm_context) +#define DRM_IOCTL_PVR_VM_MAP PVR_IOCTL(0x05, DRM_IOW, vm_map) +#define DRM_IOCTL_PVR_VM_UNMAP PVR_IOCTL(0x06, DRM_IOW, vm_unmap) +#define DRM_IOCTL_PVR_CREATE_CONTEXT PVR_IOCTL(0x07, DRM_IOWR, create_context) +#define DRM_IOCTL_PVR_DESTROY_CONTEXT PVR_IOCTL(0x08, DRM_IOW, destroy_context) +#define DRM_IOCTL_PVR_CREATE_FREE_LIST \ + PVR_IOCTL(0x09, DRM_IOWR, create_free_list) +#define DRM_IOCTL_PVR_DESTROY_FREE_LIST \ + PVR_IOCTL(0x0a, DRM_IOW, destroy_free_list) +#define DRM_IOCTL_PVR_CREATE_HWRT_DATASET \ + PVR_IOCTL(0x0b, DRM_IOWR, create_hwrt_dataset) +#define DRM_IOCTL_PVR_DESTROY_HWRT_DATASET \ + PVR_IOCTL(0x0c, DRM_IOW, destroy_hwrt_dataset) +#define DRM_IOCTL_PVR_SUBMIT_JOBS PVR_IOCTL(0x0d, DRM_IOW, submit_jobs) + +/** + * DOC: PowerVR IOCTL DEV_QUERY interface + */ + +/** + * struct drm_pvr_dev_query_gpu_info - Container used to fetch information about + * the graphics processor. + * + * When fetching this type &struct drm_pvr_ioctl_dev_query_args.type must be set + * to %DRM_PVR_DEV_QUERY_GPU_INFO_GET. + */ +struct drm_pvr_dev_query_gpu_info { + /** + * @gpu_id: GPU identifier. + * + * For all currently supported GPUs this is the BVNC encoded as a 64-bit + * value as follows: + * + * +--------+--------+--------+-------+ + * | 63..48 | 47..32 | 31..16 | 15..0 | + * +========+========+========+=======+ + * | B | V | N | C | + * +--------+--------+--------+-------+ + */ + __u64 gpu_id; + + /** + * @num_phantoms: Number of Phantoms present. + */ + __u32 num_phantoms; + + /** @_padding_c: Reserved. This field must be zeroed. */ + __u32 _padding_c; +}; + +/** + * struct drm_pvr_dev_query_runtime_info - Container used to fetch information + * about the graphics runtime. + * + * When fetching this type &struct drm_pvr_ioctl_dev_query_args.type must be set + * to %DRM_PVR_DEV_QUERY_RUNTIME_INFO_GET. + */ +struct drm_pvr_dev_query_runtime_info { + /** + * @free_list_min_pages: Minimum allowed free list size, + * in PM physical pages. + */ + __u64 free_list_min_pages; + + /** + * @free_list_max_pages: Maximum allowed free list size, + * in PM physical pages. + */ + __u64 free_list_max_pages; + + /** + * @common_store_alloc_region_size: Size of the Allocation + * Region within the Common Store used for coefficient and shared + * registers, in dwords. + */ + __u32 common_store_alloc_region_size; + + /** + * @common_store_partition_space_size: Size of the + * Partition Space within the Common Store for output buffers, in + * dwords. + */ + __u32 common_store_partition_space_size; + + /** + * @max_coeffs: Maximum coefficients, in dwords. + */ + __u32 max_coeffs; + + /** + * @cdm_max_local_mem_size_regs: Maximum amount of local + * memory available to a compute kernel, in dwords. + */ + __u32 cdm_max_local_mem_size_regs; +}; + +/** + * struct drm_pvr_dev_query_quirks - Container used to fetch information about + * hardware fixes for which the device may require support in the user mode + * driver. + * + * When fetching this type &struct drm_pvr_ioctl_dev_query_args.type must be set + * to %DRM_PVR_DEV_QUERY_QUIRKS_GET. + */ +struct drm_pvr_dev_query_quirks { + /** + * @quirks: A userspace address for the hardware quirks __u32 array. + * + * The first @musthave_count items in the list are quirks that the + * client must support for this device. If userspace does not support + * all these quirks then functionality is not guaranteed and client + * initialisation must fail. + * The remaining quirks in the list affect userspace and the kernel or + * firmware. They are disabled by default and require userspace to + * opt-in. The opt-in mechanism depends on the quirk. + */ + __u64 quirks; + + /** @count: Length of @quirks (number of __u32). */ + __u16 count; + + /** + * @musthave_count: The number of entries in @quirks that are + * mandatory, starting at index 0. + */ + __u16 musthave_count; + + /** @_padding_c: Reserved. This field must be zeroed. */ + __u32 _padding_c; +}; + +/** + * struct drm_pvr_dev_query_enhancements - Container used to fetch information + * about optional enhancements supported by the device that require support in + * the user mode driver. + * + * When fetching this type &struct drm_pvr_ioctl_dev_query_args.type must be set + * to %DRM_PVR_DEV_ENHANCEMENTS_GET. + */ +struct drm_pvr_dev_query_enhancements { + /** + * @enhancements: A userspace address for the hardware enhancements + * __u32 array. + * + * These enhancements affect userspace and the kernel or firmware. They + * are disabled by default and require userspace to opt-in. The opt-in + * mechanism depends on the enhancement. + */ + __u64 enhancements; + + /** @count: Length of @enhancements (number of __u32). */ + __u16 count; + + /** @_padding_a: Reserved. This field must be zeroed. */ + __u16 _padding_a; + + /** @_padding_c: Reserved. This field must be zeroed. */ + __u32 _padding_c; +}; + +/** + * enum drm_pvr_heap_id - Array index for heap info data returned by + * %DRM_PVR_DEV_QUERY_HEAP_INFO_GET. + * + * For compatibility reasons all indices will be present in the returned array, + * however some heaps may not be present. These are indicated where + * &struct drm_pvr_heap.size is set to zero. + */ +enum drm_pvr_heap_id { + /** @DRM_PVR_HEAP_GENERAL: General purpose heap. */ + DRM_PVR_HEAP_GENERAL = 0, + /** @DRM_PVR_HEAP_PDS_CODE_DATA: PDS code and data heap. */ + DRM_PVR_HEAP_PDS_CODE_DATA, + /** @DRM_PVR_HEAP_USC_CODE: USC code heap. */ + DRM_PVR_HEAP_USC_CODE, + /** @DRM_PVR_HEAP_RGNHDR: Region header heap. Only used if GPU has + BRN63142. */ + DRM_PVR_HEAP_RGNHDR, + /** @DRM_PVR_HEAP_VIS_TEST: Visibility test heap. */ + DRM_PVR_HEAP_VIS_TEST, + /** @DRM_PVR_HEAP_TRANSFER_FRAG: Transfer fragment heap. */ + DRM_PVR_HEAP_TRANSFER_FRAG, + + /** + * @DRM_PVR_HEAP_COUNT: The number of heaps returned by + * %DRM_PVR_DEV_QUERY_HEAP_INFO_GET. + * + * More heaps may be added, so this also serves as the copy limit when + * sent by the caller. + */ + DRM_PVR_HEAP_COUNT + /* Please only add additional heaps above DRM_PVR_HEAP_COUNT! */ +}; + +/** + * struct drm_pvr_heap - Container holding information about a single heap. + * + * This will always be fetched as an array. + */ +struct drm_pvr_heap { + /** @base: Base address of heap. */ + __u64 base; + + /** @size: Size of heap, in bytes. Will be 0 if the heap is not present. + */ + __u64 size; + + /** @flags: Flags for this heap. Currently always 0. */ + __u32 flags; + + /** @page_size_log2: Log2 of page size. */ + __u32 page_size_log2; +}; + +/** + * struct drm_pvr_dev_query_heap_info - Container used to fetch information + * about heaps supported by the device driver. + * + * Please note all driver-supported heaps will be returned up to &heaps.count. + * Some heaps will not be present in all devices, which will be indicated by + * &struct drm_pvr_heap.size being set to zero. + * + * When fetching this type &struct drm_pvr_ioctl_dev_query_args.type must be set + * to %DRM_PVR_DEV_QUERY_HEAP_INFO_GET. + */ +struct drm_pvr_dev_query_heap_info { + /** + * @heaps: Array of &struct drm_pvr_heap. If pointer is NULL, the count + * and stride will be updated with those known to the driver version, to + * facilitate allocation by the caller. + */ + struct drm_pvr_obj_array heaps; +}; + +/** + * enum drm_pvr_static_data_area_usage - Array index for static data area info + * returned by %DRM_PVR_DEV_QUERY_STATIC_DATA_AREAS_GET. + * + * For compatibility reasons all indices will be present in the returned array, + * however some areas may not be present. These are indicated where + * &struct drm_pvr_static_data_area.size is set to zero. + */ +enum drm_pvr_static_data_area_usage { + /** + * @DRM_PVR_STATIC_DATA_AREA_EOT: End of Tile PDS program code segment. + * + * The End of Tile PDS task runs at completion of a tile during a + * fragment job, and is responsible for emitting the tile to the Pixel + * Back End. + */ + DRM_PVR_STATIC_DATA_AREA_EOT = 0, + + /** + * @DRM_PVR_STATIC_DATA_AREA_FENCE: MCU fence area, used during cache + * flush and invalidation. + * + * This must point to valid physical memory but the contents otherwise + * are not used. + */ + DRM_PVR_STATIC_DATA_AREA_FENCE, + + /** + * @DRM_PVR_STATIC_DATA_AREA_VDM_SYNC: VDM sync program. + * + * The VDM sync program is used to synchronise multiple areas of the GPU + * hardware. + */ + DRM_PVR_STATIC_DATA_AREA_VDM_SYNC, + + /** + * @DRM_PVR_STATIC_DATA_AREA_YUV_CSC: YUV coefficients. + * + * Area contains up to 16 slots with stride of 64 bytes. Each is a 3x4 + * matrix of u16 fixed point numbers, with 1 sign bit, 2 integer bits + * and 13 fractional bits. + * + * The slots are : + * 0 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY_KHR + * 1 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY_KHR (full range) + * 2 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY_KHR (conformant + * range) 3 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709_KHR (full + * range) 4 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709_KHR + * (conformant range) 5 = + * VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601_KHR (full range) 6 = + * VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601_KHR (conformant range) 7 + * = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020_KHR (full range) 8 = + * VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020_KHR (conformant range) + * 9 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601_KHR (conformant + * range, 10 bit) 10 = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709_KHR + * (conformant range, 10 bit) 11 = + * VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020_KHR (conformant range, + * 10 bit) 14 = Identity (biased) 15 = Identity + */ + DRM_PVR_STATIC_DATA_AREA_YUV_CSC, +}; + +/** + * struct drm_pvr_static_data_area - Container holding information about a + * single static data area. + * + * This will always be fetched as an array. + */ +struct drm_pvr_static_data_area { + /** + * @area_usage: Usage of static data area. + * See &enum drm_pvr_static_data_area_usage. + */ + __u16 area_usage; + + /** + * @location_heap_id: Array index of heap where this of static data + * area is located. This array is fetched using + * %DRM_PVR_DEV_QUERY_HEAP_INFO_GET. + */ + __u16 location_heap_id; + + /** @size: Size of static data area. Not present if set to zero. */ + __u32 size; + + /** @offset: Offset of static data area from start of heap. */ + __u64 offset; +}; + +/** + * struct drm_pvr_dev_query_static_data_areas - Container used to fetch + * information about the static data areas in heaps supported by the device + * driver. + * + * Please note all driver-supported static data areas will be returned up to + * &static_data_areas.count. Some will not be present for all devices which, + * will be indicated by &struct drm_pvr_static_data_area.size being set to zero. + * + * Further, some heaps will not be present either. See &struct + * drm_pvr_dev_query_heap_info. + * + * When fetching this type &struct drm_pvr_ioctl_dev_query_args.type must be set + * to %DRM_PVR_DEV_QUERY_STATIC_DATA_AREAS_GET. + */ +struct drm_pvr_dev_query_static_data_areas { + /** + * @static_data_areas: Array of &struct drm_pvr_static_data_area. If + * pointer is NULL, the count and stride will be updated with those + * known to the driver version, to facilitate allocation by the caller. + */ + struct drm_pvr_obj_array static_data_areas; +}; + +/** + * enum drm_pvr_dev_query - For use with &drm_pvr_ioctl_dev_query_args.type to + * indicate the type of the receiving container. + * + * Append only. Do not reorder. + */ +enum drm_pvr_dev_query { + /** + * @DRM_PVR_DEV_QUERY_GPU_INFO_GET: The dev query args contain a pointer + * to &struct drm_pvr_dev_query_gpu_info. + */ + DRM_PVR_DEV_QUERY_GPU_INFO_GET = 0, + + /** + * @DRM_PVR_DEV_QUERY_RUNTIME_INFO_GET: The dev query args contain a + * pointer to &struct drm_pvr_dev_query_runtime_info. + */ + DRM_PVR_DEV_QUERY_RUNTIME_INFO_GET, + + /** + * @DRM_PVR_DEV_QUERY_QUIRKS_GET: The dev query args contain a pointer + * to &struct drm_pvr_dev_query_quirks. + */ + DRM_PVR_DEV_QUERY_QUIRKS_GET, + + /** + * @DRM_PVR_DEV_QUERY_ENHANCEMENTS_GET: The dev query args contain a + * pointer to &struct drm_pvr_dev_query_enhancements. + */ + DRM_PVR_DEV_QUERY_ENHANCEMENTS_GET, + + /** + * @DRM_PVR_DEV_QUERY_HEAP_INFO_GET: The dev query args contain a + * pointer to &struct drm_pvr_dev_query_heap_info. + */ + DRM_PVR_DEV_QUERY_HEAP_INFO_GET, + + /** + * @DRM_PVR_DEV_QUERY_STATIC_DATA_AREAS_GET: The dev query args contain + * a pointer to &struct drm_pvr_dev_query_static_data_areas. + */ + DRM_PVR_DEV_QUERY_STATIC_DATA_AREAS_GET, +}; + +/** + * struct drm_pvr_ioctl_dev_query_args - Arguments for %DRM_IOCTL_PVR_DEV_QUERY. + */ +struct drm_pvr_ioctl_dev_query_args { + /** + * @type: Type of query and output struct. See &enum drm_pvr_dev_query. + */ + __u32 type; + + /** + * @size: Size of the receiving struct, see @type. + * + * After a successful call this will be updated to the written byte + * length. + * Can also be used to get the minimum byte length (see @pointer). + * This allows additional fields to be appended to the structs in + * future. + */ + __u32 size; + + /** + * @pointer: Pointer to struct @type. + * + * Must be large enough to contain @size bytes. + * If pointer is NULL, the expected size will be returned in the @size + * field, but no other data will be written. + */ + __u64 pointer; +}; + +/** + * DOC: PowerVR IOCTL CREATE_BO interface + */ + +/** + * DOC: Flags for CREATE_BO + * + * We use "device" to refer to the GPU here because of the ambiguity between CPU + * and GPU in some fonts. + * + * Device mapping options + * :DRM_PVR_BO_BYPASS_DEVICE_CACHE: Specify that device accesses to this + * memory will bypass the cache. This is used for buffers that will either be + * regularly updated by the CPU (eg free lists) or will be accessed only once + * and therefore isn't worth caching (eg partial render buffers). By default, + * the device flushes its memory caches after every job, so this is not normally + * required for coherency. + * :DRM_PVR_BO_PM_FW_PROTECT: Specify that only the Parameter Manager (PM) + * and/or firmware processor should be allowed to access this memory when mapped + * to the device. It is not valid to specify this flag with + * DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS. + * + * CPU mapping options + * :DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS: Allow userspace to map and access + * the contents of this memory. It is not valid to specify this flag with + * DRM_PVR_BO_PM_FW_PROTECT. + */ +#define DRM_PVR_BO_BYPASS_DEVICE_CACHE _BITULL(0) +#define DRM_PVR_BO_PM_FW_PROTECT _BITULL(1) +#define DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS _BITULL(2) +/* Bits 3..63 are reserved. */ + +#define DRM_PVR_BO_FLAGS_MASK \ + (DRM_PVR_BO_BYPASS_DEVICE_CACHE | DRM_PVR_BO_PM_FW_PROTECT | \ + DRM_PVR_BO_ALLOW_CPU_USERSPACE_ACCESS) + +/** + * struct drm_pvr_ioctl_create_bo_args - Arguments for %DRM_IOCTL_PVR_CREATE_BO + */ +struct drm_pvr_ioctl_create_bo_args { + /** + * @size: [IN] Size of buffer object to create. This must be page size + * aligned. + */ + __u64 size; + + /** + * @handle: [OUT] GEM handle of the new buffer object for use in + * userspace. + */ + __u32 handle; + + /** @_padding_c: Reserved. This field must be zeroed. */ + __u32 _padding_c; + + /** + * @flags: [IN] Options which will affect the behaviour of this + * creation operation and future mapping operations on the created + * object. This field must be a valid combination of ``DRM_PVR_BO_*`` + * values, with all bits marked as reserved set to zero. + */ + __u64 flags; +}; + +/** + * DOC: PowerVR IOCTL GET_BO_MMAP_OFFSET interface + */ + +/** + * struct drm_pvr_ioctl_get_bo_mmap_offset_args - Arguments for + * %DRM_IOCTL_PVR_GET_BO_MMAP_OFFSET + * + * Like other DRM drivers, the "mmap" IOCTL doesn't actually map any memory. + * Instead, it allocates a fake offset which refers to the specified buffer + * object. This offset can be used with a real mmap call on the DRM device + * itself. + */ +struct drm_pvr_ioctl_get_bo_mmap_offset_args { + /** @handle: [IN] GEM handle of the buffer object to be mapped. */ + __u32 handle; + + /** @_padding_4: Reserved. This field must be zeroed. */ + __u32 _padding_4; + + /** @offset: [OUT] Fake offset to use in the real mmap call. */ + __u64 offset; +}; + +/** + * DOC: PowerVR IOCTL CREATE_VM_CONTEXT and DESTROY_VM_CONTEXT interfaces + */ + +/** + * struct drm_pvr_ioctl_create_vm_context_args - Arguments for + * %DRM_IOCTL_PVR_CREATE_VM_CONTEXT + */ +struct drm_pvr_ioctl_create_vm_context_args { + /** @handle: [OUT] Handle for new VM context. */ + __u32 handle; + + /** @_padding_4: Reserved. This field must be zeroed. */ + __u32 _padding_4; +}; + +/** + * struct drm_pvr_ioctl_destroy_vm_context_args - Arguments for + * %DRM_IOCTL_PVR_DESTROY_VM_CONTEXT + */ +struct drm_pvr_ioctl_destroy_vm_context_args { + /** + * @handle: [IN] Handle for VM context to be destroyed. + */ + __u32 handle; + + /** @_padding_4: Reserved. This field must be zeroed. */ + __u32 _padding_4; +}; + +/** + * DOC: PowerVR IOCTL VM_MAP and VM_UNMAP interfaces + * + * The VM UAPI allows userspace to create buffer object mappings in GPU virtual + * address space. + * + * The client is responsible for managing GPU address space. It should allocate + * mappings within the heaps returned by %DRM_PVR_DEV_QUERY_HEAP_INFO_GET. + * + * %DRM_IOCTL_PVR_VM_MAP creates a new mapping. The client provides the target + * virtual address for the mapping. Size and offset within the mapped buffer + * object can be specified, so the client can partially map a buffer. + * + * %DRM_IOCTL_PVR_VM_UNMAP removes a mapping. The entire mapping will be removed + * from GPU address space only if the size of the mapping matches that known to + * the driver. + */ + +/** + * struct drm_pvr_ioctl_vm_map_args - Arguments for %DRM_IOCTL_PVR_VM_MAP. + */ +struct drm_pvr_ioctl_vm_map_args { + /** + * @vm_context_handle: [IN] Handle for VM context for this mapping to + * exist in. + */ + __u32 vm_context_handle; + + /** @flags: [IN] Flags which affect this mapping. Currently always 0. */ + __u32 flags; + + /** + * @device_addr: [IN] Requested device-virtual address for the mapping. + * This must be non-zero and aligned to the device page size for the + * heap containing the requested address. It is an error to specify an + * address which is not contained within one of the heaps returned by + * %DRM_PVR_DEV_QUERY_HEAP_INFO_GET. + */ + __u64 device_addr; + + /** + * @handle: [IN] Handle of the target buffer object. This must be a + * valid handle returned by %DRM_IOCTL_PVR_CREATE_BO. + */ + __u32 handle; + + /** @_padding_14: Reserved. This field must be zeroed. */ + __u32 _padding_14; + + /** + * @offset: [IN] Offset into the target bo from which to begin the + * mapping. + */ + __u64 offset; + + /** + * @size: [IN] Size of the requested mapping. Must be aligned to + * the device page size for the heap containing the requested address, + * as well as the host page size. When added to @device_addr, the + * result must not overflow the heap which contains @device_addr (i.e. + * the range specified by @device_addr and @size must be completely + * contained within a single heap specified by + * %DRM_PVR_DEV_QUERY_HEAP_INFO_GET). + */ + __u64 size; +}; + +/** + * struct drm_pvr_ioctl_vm_unmap_args - Arguments for %DRM_IOCTL_PVR_VM_UNMAP. + */ +struct drm_pvr_ioctl_vm_unmap_args { + /** + * @vm_context_handle: [IN] Handle for VM context that this mapping + * exists in. + */ + __u32 vm_context_handle; + + /** @_padding_4: Reserved. This field must be zeroed. */ + __u32 _padding_4; + + /** + * @device_addr: [IN] Device-virtual address at the start of the target + * mapping. This must be non-zero. + */ + __u64 device_addr; + + /** + * @size: Size in bytes of the target mapping. This must be non-zero. + */ + __u64 size; +}; + +/** + * DOC: PowerVR IOCTL CREATE_CONTEXT and DESTROY_CONTEXT interfaces + */ + +/** + * enum drm_pvr_ctx_priority - Arguments for + * &drm_pvr_ioctl_create_context_args.priority + */ +enum drm_pvr_ctx_priority { + /** @DRM_PVR_CTX_PRIORITY_LOW: Priority below normal. */ + DRM_PVR_CTX_PRIORITY_LOW = -512, + + /** @DRM_PVR_CTX_PRIORITY_NORMAL: Normal priority. */ + DRM_PVR_CTX_PRIORITY_NORMAL = 0, + + /** + * @DRM_PVR_CTX_PRIORITY_HIGH: Priority above normal. + * Note this requires ``CAP_SYS_NICE`` or ``DRM_MASTER``. + */ + DRM_PVR_CTX_PRIORITY_HIGH = 512, +}; + +/** + * enum drm_pvr_ctx_type - Arguments for + * &struct drm_pvr_ioctl_create_context_args.type + */ +enum drm_pvr_ctx_type { + /** + * @DRM_PVR_CTX_TYPE_RENDER: Render context. + */ + DRM_PVR_CTX_TYPE_RENDER = 0, + + /** + * @DRM_PVR_CTX_TYPE_COMPUTE: Compute context. + */ + DRM_PVR_CTX_TYPE_COMPUTE, + + /** + * @DRM_PVR_CTX_TYPE_TRANSFER_FRAG: Transfer context for fragment data + * master. + */ + DRM_PVR_CTX_TYPE_TRANSFER_FRAG, +}; + +/** + * struct drm_pvr_ioctl_create_context_args - Arguments for + * %DRM_IOCTL_PVR_CREATE_CONTEXT + */ +struct drm_pvr_ioctl_create_context_args { + /** + * @type: [IN] Type of context to create. + * + * This must be one of the values defined by &enum drm_pvr_ctx_type. + */ + __u32 type; + + /** @flags: [IN] Flags for context. */ + __u32 flags; + + /** + * @priority: [IN] Priority of new context. + * + * This must be one of the values defined by &enum drm_pvr_ctx_priority. + */ + __s32 priority; + + /** @handle: [OUT] Handle for new context. */ + __u32 handle; + + /** + * @static_context_state: [IN] Pointer to static context state stream. + */ + __u64 static_context_state; + + /** + * @static_context_state_len: [IN] Length of static context state, in + * bytes. + */ + __u32 static_context_state_len; + + /** + * @vm_context_handle: [IN] Handle for VM context that this context is + * associated with. + */ + __u32 vm_context_handle; + + /** + * @callstack_addr: [IN] Address for initial call stack pointer. Only + * valid if @type is %DRM_PVR_CTX_TYPE_RENDER, otherwise must be 0. + */ + __u64 callstack_addr; +}; + +/** + * struct drm_pvr_ioctl_destroy_context_args - Arguments for + * %DRM_IOCTL_PVR_DESTROY_CONTEXT + */ +struct drm_pvr_ioctl_destroy_context_args { + /** + * @handle: [IN] Handle for context to be destroyed. + */ + __u32 handle; + + /** @_padding_4: Reserved. This field must be zeroed. */ + __u32 _padding_4; +}; + +/** + * DOC: PowerVR IOCTL CREATE_FREE_LIST and DESTROY_FREE_LIST interfaces + */ + +/** + * struct drm_pvr_ioctl_create_free_list_args - Arguments for + * %DRM_IOCTL_PVR_CREATE_FREE_LIST + * + * Free list arguments have the following constraints : + * + * - @max_num_pages must be greater than zero. + * - @grow_threshold must be between 0 and 100. + * - @grow_num_pages must be less than or equal to &max_num_pages. + * - @initial_num_pages, @max_num_pages and @grow_num_pages must be multiples + * of 4. + * - When &grow_num_pages is 0, @initial_num_pages must be equal to + * @max_num_pages. + * - When &grow_num_pages is non-zero, @initial_num_pages must be less than + * @max_num_pages. + */ +struct drm_pvr_ioctl_create_free_list_args { + /** + * @free_list_gpu_addr: [IN] Address of GPU mapping of buffer object + * containing memory to be used by free list. + * + * The mapped region of the buffer object must be at least + * @max_num_pages * ``sizeof(__u32)``. + * + * The buffer object must have been created with + * %DRM_PVR_BO_DEVICE_PM_FW_PROTECT set and + * %DRM_PVR_BO_CPU_ALLOW_USERSPACE_ACCESS not set. + */ + __u64 free_list_gpu_addr; + + /** @initial_num_pages: [IN] Pages initially allocated to free list. */ + __u32 initial_num_pages; + + /** @max_num_pages: [IN] Maximum number of pages in free list. */ + __u32 max_num_pages; + + /** @grow_num_pages: [IN] Pages to grow free list by per request. */ + __u32 grow_num_pages; + + /** + * @grow_threshold: [IN] Percentage of FL memory used that should + * trigger a new grow request. + */ + __u32 grow_threshold; + + /** + * @vm_context_handle: [IN] Handle for VM context that the free list + * buffer object is mapped in. + */ + __u32 vm_context_handle; + + /** + * @handle: [OUT] Handle for created free list. + */ + __u32 handle; +}; + +/** + * struct drm_pvr_ioctl_destroy_free_list_args - Arguments for + * %DRM_IOCTL_PVR_DESTROY_FREE_LIST + */ +struct drm_pvr_ioctl_destroy_free_list_args { + /** + * @handle: [IN] Handle for free list to be destroyed. + */ + __u32 handle; + + /** @_padding_4: Reserved. This field must be zeroed. */ + __u32 _padding_4; +}; + +/** + * DOC: PowerVR IOCTL CREATE_HWRT_DATASET and DESTROY_HWRT_DATASET interfaces + */ + +/** + * struct drm_pvr_create_hwrt_geom_data_args - Geometry data arguments used for + * &struct drm_pvr_ioctl_create_hwrt_dataset_args.geom_data_args. + */ +struct drm_pvr_create_hwrt_geom_data_args { + /** @tpc_dev_addr: [IN] Tail pointer cache GPU virtual address. */ + __u64 tpc_dev_addr; + + /** @tpc_size: [IN] Size of TPC, in bytes. */ + __u32 tpc_size; + + /** @tpc_stride: [IN] Stride between layers in TPC, in pages */ + __u32 tpc_stride; + + /** @vheap_table_dev_addr: [IN] VHEAP table GPU virtual address. */ + __u64 vheap_table_dev_addr; + + /** @rtc_dev_addr: [IN] Render Target Cache virtual address. */ + __u64 rtc_dev_addr; +}; + +/** + * struct drm_pvr_create_hwrt_rt_data_args - Render target arguments used for + * &struct drm_pvr_ioctl_create_hwrt_dataset_args.rt_data_args. + */ +struct drm_pvr_create_hwrt_rt_data_args { + /** @pm_mlist_dev_addr: [IN] PM MLIST GPU virtual address. */ + __u64 pm_mlist_dev_addr; + + /** @macrotile_array_dev_addr: [IN] Macrotile array GPU virtual address. + */ + __u64 macrotile_array_dev_addr; + + /** @region_header_dev_addr: [IN] Region header array GPU virtual + * address. */ + __u64 region_header_dev_addr; +}; + +#define PVR_DRM_HWRT_FREE_LIST_LOCAL 0 +#define PVR_DRM_HWRT_FREE_LIST_GLOBAL 1U + +/** + * struct drm_pvr_ioctl_create_hwrt_dataset_args - Arguments for + * %DRM_IOCTL_PVR_CREATE_HWRT_DATASET + */ +struct drm_pvr_ioctl_create_hwrt_dataset_args { + /** @geom_data_args: [IN] Geometry data arguments. */ + struct drm_pvr_create_hwrt_geom_data_args geom_data_args; + + /** + * @rt_data_args: [IN] Array of render target arguments. + * + * Each entry in this array represents a render target in a double + * buffered setup. + */ + struct drm_pvr_create_hwrt_rt_data_args rt_data_args[2]; + + /** + * @free_list_handles: [IN] Array of free list handles. + * + * free_list_handles[PVR_DRM_HWRT_FREE_LIST_LOCAL] must have initial + * size of at least that reported by + * &drm_pvr_dev_query_runtime_info.free_list_min_pages. + */ + __u32 free_list_handles[2]; + + /** @width: [IN] Width in pixels. */ + __u32 width; + + /** @height: [IN] Height in pixels. */ + __u32 height; + + /** @samples: [IN] Number of samples. */ + __u32 samples; + + /** @layers: [IN] Number of layers. */ + __u32 layers; + + /** @isp_merge_lower_x: [IN] Lower X coefficient for triangle merging. + */ + __u32 isp_merge_lower_x; + + /** @isp_merge_lower_y: [IN] Lower Y coefficient for triangle merging. + */ + __u32 isp_merge_lower_y; + + /** @isp_merge_scale_x: [IN] Scale X coefficient for triangle merging. + */ + __u32 isp_merge_scale_x; + + /** @isp_merge_scale_y: [IN] Scale Y coefficient for triangle merging. + */ + __u32 isp_merge_scale_y; + + /** @isp_merge_upper_x: [IN] Upper X coefficient for triangle merging. + */ + __u32 isp_merge_upper_x; + + /** @isp_merge_upper_y: [IN] Upper Y coefficient for triangle merging. + */ + __u32 isp_merge_upper_y; + + /** + * @region_header_size: [IN] Size of region header array. This common + * field is used by both render targets in this data set. + * + * The units for this field differ depending on what version of the + * simple internal parameter format the device uses. If format 2 is in + * use then this is interpreted as the number of region headers. For + * other formats it is interpreted as the size in dwords. + */ + __u32 region_header_size; + + /** + * @handle: [OUT] Handle for created HWRT dataset. + */ + __u32 handle; +}; + +/** + * struct drm_pvr_ioctl_destroy_hwrt_dataset_args - Arguments for + * %DRM_IOCTL_PVR_DESTROY_HWRT_DATASET + */ +struct drm_pvr_ioctl_destroy_hwrt_dataset_args { + /** + * @handle: [IN] Handle for HWRT dataset to be destroyed. + */ + __u32 handle; + + /** @_padding_4: Reserved. This field must be zeroed. */ + __u32 _padding_4; +}; + +/** + * DOC: PowerVR IOCTL SUBMIT_JOBS interface + */ + +/** + * DOC: Flags for the drm_pvr_sync_op object. + * + * .. c:macro:: DRM_PVR_SYNC_OP_HANDLE_TYPE_MASK + * + * Handle type mask for the drm_pvr_sync_op::flags field. + * + * .. c:macro:: DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_SYNCOBJ + * + * Indicates the handle passed in drm_pvr_sync_op::handle is a syncobj + * handle. This is the default type. + * + * .. c:macro:: DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_TIMELINE_SYNCOBJ + * + * Indicates the handle passed in drm_pvr_sync_op::handle is a timeline + * syncobj handle. + * + * .. c:macro:: DRM_PVR_SYNC_OP_FLAG_SIGNAL + * + * Signal operation requested. The out-fence bound to the job will be + * attached to the syncobj whose handle is passed in drm_pvr_sync_op::handle. + * + * .. c:macro:: DRM_PVR_SYNC_OP_FLAG_WAIT + * + * Wait operation requested. The job will wait for this particular syncobj or + * syncobj point to be signaled before being started. This is the default + * operation. + */ +#define DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_MASK 0xf +#define DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_SYNCOBJ 0 +#define DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_TIMELINE_SYNCOBJ 1 +#define DRM_PVR_SYNC_OP_FLAG_SIGNAL _BITULL(31) +#define DRM_PVR_SYNC_OP_FLAG_WAIT 0 + +#define DRM_PVR_SYNC_OP_FLAGS_MASK \ + (DRM_PVR_SYNC_OP_FLAG_HANDLE_TYPE_MASK | DRM_PVR_SYNC_OP_FLAG_SIGNAL) + +/** + * struct drm_pvr_sync_op - Object describing a sync operation + */ +struct drm_pvr_sync_op { + /** @handle: Handle of sync object. */ + __u32 handle; + + /** @flags: Combination of ``DRM_PVR_SYNC_OP_FLAG_`` flags. */ + __u32 flags; + + /** @value: Timeline value for this drm_syncobj. MBZ for a binary + * syncobj. */ + __u64 value; +}; + +/** + * DOC: Flags for SUBMIT_JOB ioctl geometry command. + * + * .. c:macro:: DRM_PVR_SUBMIT_JOB_GEOM_CMD_FIRST + * + * Indicates if this the first command to be issued for a render. + * + * .. c:macro:: DRM_PVR_SUBMIT_JOB_GEOM_CMD_LAST + * + * Indicates if this the last command to be issued for a render. + * + * .. c:macro:: DRM_PVR_SUBMIT_JOB_GEOM_CMD_SINGLE_CORE + * + * Forces to use single core in a multi core device. + * + * .. c:macro:: DRM_PVR_SUBMIT_JOB_GEOM_CMD_FLAGS_MASK + * + * Logical OR of all the geometry cmd flags. + */ +#define DRM_PVR_SUBMIT_JOB_GEOM_CMD_FIRST _BITULL(0) +#define DRM_PVR_SUBMIT_JOB_GEOM_CMD_LAST _BITULL(1) +#define DRM_PVR_SUBMIT_JOB_GEOM_CMD_SINGLE_CORE _BITULL(2) +#define DRM_PVR_SUBMIT_JOB_GEOM_CMD_FLAGS_MASK \ + (DRM_PVR_SUBMIT_JOB_GEOM_CMD_FIRST | \ + DRM_PVR_SUBMIT_JOB_GEOM_CMD_LAST | \ + DRM_PVR_SUBMIT_JOB_GEOM_CMD_SINGLE_CORE) + +/** + * DOC: Flags for SUBMIT_JOB ioctl fragment command. + * + * .. c:macro:: DRM_PVR_SUBMIT_JOB_FRAG_CMD_SINGLE_CORE + * + * Use single core in a multi core setup. + * + * .. c:macro:: DRM_PVR_SUBMIT_JOB_FRAG_CMD_DEPTHBUFFER + * + * Indicates whether a depth buffer is present. + * + * .. c:macro:: DRM_PVR_SUBMIT_JOB_FRAG_CMD_STENCILBUFFER + * + * Indicates whether a stencil buffer is present. + * + * .. c:macro:: DRM_PVR_SUBMIT_JOB_FRAG_CMD_PREVENT_CDM_OVERLAP + * + * Disallow compute overlapped with this render. + * + * .. c:macro:: DRM_PVR_SUBMIT_JOB_FRAG_CMD_GET_VIS_RESULTS + * + * Indicates whether this render produces visibility results. + * + * .. c:macro:: DRM_PVR_SUBMIT_JOB_FRAG_CMD_SCRATCHBUFFER + * + * Indicates whether partial renders write to a scratch buffer instead of + * the final surface. It also forces the full screen copy expected to be + * present on the last render after all partial renders have completed. + * + * .. c:macro:: DRM_PVR_SUBMIT_JOB_FRAG_CMD_DISABLE_PIXELMERGE + * + * Disable pixel merging for this render. + * + * .. c:macro:: DRM_PVR_SUBMIT_JOB_FRAG_CMD_FLAGS_MASK + * + * Logical OR of all the fragment cmd flags. + */ +#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_SINGLE_CORE _BITULL(0) +#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_DEPTHBUFFER _BITULL(1) +#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_STENCILBUFFER _BITULL(2) +#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_PREVENT_CDM_OVERLAP _BITULL(3) +#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_SCRATCHBUFFER _BITULL(4) +#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_GET_VIS_RESULTS _BITULL(5) +#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_PARTIAL_RENDER _BITULL(6) +#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_DISABLE_PIXELMERGE _BITULL(7) +#define DRM_PVR_SUBMIT_JOB_FRAG_CMD_FLAGS_MASK \ + (DRM_PVR_SUBMIT_JOB_FRAG_CMD_SINGLE_CORE | \ + DRM_PVR_SUBMIT_JOB_FRAG_CMD_DEPTHBUFFER | \ + DRM_PVR_SUBMIT_JOB_FRAG_CMD_STENCILBUFFER | \ + DRM_PVR_SUBMIT_JOB_FRAG_CMD_PREVENT_CDM_OVERLAP | \ + DRM_PVR_SUBMIT_JOB_FRAG_CMD_SCRATCHBUFFER | \ + DRM_PVR_SUBMIT_JOB_FRAG_CMD_GET_VIS_RESULTS | \ + DRM_PVR_SUBMIT_JOB_FRAG_CMD_PARTIAL_RENDER | \ + DRM_PVR_SUBMIT_JOB_FRAG_CMD_DISABLE_PIXELMERGE) + +/** + * DOC: Flags for SUBMIT_JOB ioctl compute command. + * + * .. c:macro:: DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_PREVENT_ALL_OVERLAP + * + * Disallow other jobs overlapped with this compute. + * + * .. c:macro:: DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_SINGLE_CORE + * + * Forces to use single core in a multi core device. + * + * .. c:macro:: DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_FLAGS_MASK + * + * Logical OR of all the compute cmd flags. + */ +#define DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_PREVENT_ALL_OVERLAP _BITULL(0) +#define DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_SINGLE_CORE _BITULL(1) +#define DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_FLAGS_MASK \ + (DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_PREVENT_ALL_OVERLAP | \ + DRM_PVR_SUBMIT_JOB_COMPUTE_CMD_SINGLE_CORE) + +/** + * DOC: Flags for SUBMIT_JOB ioctl transfer command. + * + * .. c:macro:: DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_SINGLE_CORE + * + * Forces job to use a single core in a multi core device. + * + * .. c:macro:: DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_FLAGS_MASK + * + * Logical OR of all the transfer cmd flags. + */ +#define DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_SINGLE_CORE _BITULL(0) + +#define DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_FLAGS_MASK \ + DRM_PVR_SUBMIT_JOB_TRANSFER_CMD_SINGLE_CORE + +/** + * enum drm_pvr_job_type - Arguments for &struct drm_pvr_job.job_type + */ +enum drm_pvr_job_type { + /** @DRM_PVR_JOB_TYPE_GEOMETRY: Job type is geometry. */ + DRM_PVR_JOB_TYPE_GEOMETRY = 0, + + /** @DRM_PVR_JOB_TYPE_FRAGMENT: Job type is fragment. */ + DRM_PVR_JOB_TYPE_FRAGMENT, + + /** @DRM_PVR_JOB_TYPE_COMPUTE: Job type is compute. */ + DRM_PVR_JOB_TYPE_COMPUTE, + + /** @DRM_PVR_JOB_TYPE_TRANSFER_FRAG: Job type is a fragment transfer. */ + DRM_PVR_JOB_TYPE_TRANSFER_FRAG, +}; + +/** + * struct drm_pvr_hwrt_data_ref - Reference HWRT data + */ +struct drm_pvr_hwrt_data_ref { + /** @set_handle: HWRT data set handle. */ + __u32 set_handle; + + /** @data_index: Index of the HWRT data inside the data set. */ + __u32 data_index; +}; + +/** + * struct drm_pvr_job - Job arguments passed to the %DRM_IOCTL_PVR_SUBMIT_JOBS + * ioctl + */ +struct drm_pvr_job { + /** + * @type: [IN] Type of job being submitted + * + * This must be one of the values defined by &enum drm_pvr_job_type. + */ + __u32 type; + + /** + * @context_handle: [IN] Context handle. + * + * When @job_type is %DRM_PVR_JOB_TYPE_RENDER, %DRM_PVR_JOB_TYPE_COMPUTE + * or %DRM_PVR_JOB_TYPE_TRANSFER_FRAG, this must be a valid handle + * returned by %DRM_IOCTL_PVR_CREATE_CONTEXT. The type of context must + * be compatible with the type of job being submitted. + * + * When @job_type is %DRM_PVR_JOB_TYPE_NULL, this must be zero. + */ + __u32 context_handle; + + /** + * @flags: [IN] Flags for command. + * + * Those are job-dependent. See all ``DRM_PVR_SUBMIT_JOB_*``. + */ + __u32 flags; + + /** + * @cmd_stream_len: [IN] Length of command stream, in bytes. + */ + __u32 cmd_stream_len; + + /** + * @cmd_stream: [IN] Pointer to command stream for command. + * + * The command stream must be u64-aligned. + */ + __u64 cmd_stream; + + /** @sync_ops: [IN] Fragment sync operations. */ + struct drm_pvr_obj_array sync_ops; + + /** + * @hwrt: [IN] HWRT data used by render jobs (geometry or fragment). + * + * Must be zero for non-render jobs. + */ + struct drm_pvr_hwrt_data_ref hwrt; +}; + +/** + * struct drm_pvr_ioctl_submit_jobs_args - Arguments for + * %DRM_IOCTL_PVR_SUBMIT_JOB + * + * If the syscall returns an error it is important to check the value of + * @jobs.count. This indicates the index into @jobs.array where the + * error occurred. + */ +struct drm_pvr_ioctl_submit_jobs_args { + /** @jobs: [IN] Array of jobs to submit. */ + struct drm_pvr_obj_array jobs; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* PVR_DRM_UAPI_H */ diff --git a/include/arch/x86_64/drm/qaic_accel.h b/include/arch/x86_64/drm/qaic_accel.h new file mode 100644 index 00000000..b6497637 --- /dev/null +++ b/include/arch/x86_64/drm/qaic_accel.h @@ -0,0 +1,412 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + * + * Copyright (c) 2019-2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef QAIC_ACCEL_H_ +#define QAIC_ACCEL_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* The length(4K) includes len and count fields of qaic_manage_msg */ +#define QAIC_MANAGE_MAX_MSG_LENGTH SZ_4K + +/* semaphore flags */ +#define QAIC_SEM_INSYNCFENCE 2 +#define QAIC_SEM_OUTSYNCFENCE 1 + +/* Semaphore commands */ +#define QAIC_SEM_NOP 0 +#define QAIC_SEM_INIT 1 +#define QAIC_SEM_INC 2 +#define QAIC_SEM_DEC 3 +#define QAIC_SEM_WAIT_EQUAL 4 +#define QAIC_SEM_WAIT_GT_EQ 5 /* Greater than or equal */ +#define QAIC_SEM_WAIT_GT_0 6 /* Greater than 0 */ + +#define QAIC_TRANS_UNDEFINED 0 +#define QAIC_TRANS_PASSTHROUGH_FROM_USR 1 +#define QAIC_TRANS_PASSTHROUGH_TO_USR 2 +#define QAIC_TRANS_PASSTHROUGH_FROM_DEV 3 +#define QAIC_TRANS_PASSTHROUGH_TO_DEV 4 +#define QAIC_TRANS_DMA_XFER_FROM_USR 5 +#define QAIC_TRANS_DMA_XFER_TO_DEV 6 +#define QAIC_TRANS_ACTIVATE_FROM_USR 7 +#define QAIC_TRANS_ACTIVATE_FROM_DEV 8 +#define QAIC_TRANS_ACTIVATE_TO_DEV 9 +#define QAIC_TRANS_DEACTIVATE_FROM_USR 10 +#define QAIC_TRANS_DEACTIVATE_FROM_DEV 11 +#define QAIC_TRANS_STATUS_FROM_USR 12 +#define QAIC_TRANS_STATUS_TO_USR 13 +#define QAIC_TRANS_STATUS_FROM_DEV 14 +#define QAIC_TRANS_STATUS_TO_DEV 15 +#define QAIC_TRANS_TERMINATE_FROM_DEV 16 +#define QAIC_TRANS_TERMINATE_TO_DEV 17 +#define QAIC_TRANS_DMA_XFER_CONT 18 +#define QAIC_TRANS_VALIDATE_PARTITION_FROM_DEV 19 +#define QAIC_TRANS_VALIDATE_PARTITION_TO_DEV 20 + +/** + * struct qaic_manage_trans_hdr - Header for a transaction in a manage message. + * @type: In. Identifies this transaction. See QAIC_TRANS_* defines. + * @len: In. Length of this transaction, including this header. + */ +struct qaic_manage_trans_hdr { + __u32 type; + __u32 len; +}; + +/** + * struct qaic_manage_trans_passthrough - Defines a passthrough transaction. + * @hdr: In. Header to identify this transaction. + * @data: In. Payload of this transaction. Opaque to the driver. Userspace must + * encode in little endian and align/pad to 64-bit. + */ +struct qaic_manage_trans_passthrough { + struct qaic_manage_trans_hdr hdr; + __u8 data[]; +}; + +/** + * struct qaic_manage_trans_dma_xfer - Defines a DMA transfer transaction. + * @hdr: In. Header to identify this transaction. + * @tag: In. Identified this transfer in other transactions. Opaque to the + * driver. + * @pad: Structure padding. + * @addr: In. Address of the data to DMA to the device. + * @size: In. Length of the data to DMA to the device. + */ +struct qaic_manage_trans_dma_xfer { + struct qaic_manage_trans_hdr hdr; + __u32 tag; + __u32 pad; + __u64 addr; + __u64 size; +}; + +/** + * struct qaic_manage_trans_activate_to_dev - Defines an activate request. + * @hdr: In. Header to identify this transaction. + * @queue_size: In. Number of elements for DBC request and response queues. + * @eventfd: Unused. + * @options: In. Device specific options for this activate. + * @pad: Structure padding. Must be 0. + */ +struct qaic_manage_trans_activate_to_dev { + struct qaic_manage_trans_hdr hdr; + __u32 queue_size; + __u32 eventfd; + __u32 options; + __u32 pad; +}; + +/** + * struct qaic_manage_trans_activate_from_dev - Defines an activate response. + * @hdr: Out. Header to identify this transaction. + * @status: Out. Return code of the request from the device. + * @dbc_id: Out. Id of the assigned DBC for successful request. + * @options: Out. Device specific options for this activate. + */ +struct qaic_manage_trans_activate_from_dev { + struct qaic_manage_trans_hdr hdr; + __u32 status; + __u32 dbc_id; + __u64 options; +}; + +/** + * struct qaic_manage_trans_deactivate - Defines a deactivate request. + * @hdr: In. Header to identify this transaction. + * @dbc_id: In. Id of assigned DBC. + * @pad: Structure padding. Must be 0. + */ +struct qaic_manage_trans_deactivate { + struct qaic_manage_trans_hdr hdr; + __u32 dbc_id; + __u32 pad; +}; + +/** + * struct qaic_manage_trans_status_to_dev - Defines a status request. + * @hdr: In. Header to identify this transaction. + */ +struct qaic_manage_trans_status_to_dev { + struct qaic_manage_trans_hdr hdr; +}; + +/** + * struct qaic_manage_trans_status_from_dev - Defines a status response. + * @hdr: Out. Header to identify this transaction. + * @major: Out. NNC protocol version major number. + * @minor: Out. NNC protocol version minor number. + * @status: Out. Return code from device. + * @status_flags: Out. Flags from device. Bit 0 indicates if CRCs are required. + */ +struct qaic_manage_trans_status_from_dev { + struct qaic_manage_trans_hdr hdr; + __u16 major; + __u16 minor; + __u32 status; + __u64 status_flags; +}; + +/** + * struct qaic_manage_msg - Defines a message to the device. + * @len: In. Length of all the transactions contained within this message. + * @count: In. Number of transactions in this message. + * @data: In. Address to an array where the transactions can be found. + */ +struct qaic_manage_msg { + __u32 len; + __u32 count; + __u64 data; +}; + +/** + * struct qaic_create_bo - Defines a request to create a buffer object. + * @size: In. Size of the buffer in bytes. + * @handle: Out. GEM handle for the BO. + * @pad: Structure padding. Must be 0. + */ +struct qaic_create_bo { + __u64 size; + __u32 handle; + __u32 pad; +}; + +/** + * struct qaic_mmap_bo - Defines a request to prepare a BO for mmap(). + * @handle: In. Handle of the GEM BO to prepare for mmap(). + * @pad: Structure padding. Must be 0. + * @offset: Out. Offset value to provide to mmap(). + */ +struct qaic_mmap_bo { + __u32 handle; + __u32 pad; + __u64 offset; +}; + +/** + * struct qaic_sem - Defines a semaphore command for a BO slice. + * @val: In. Only lower 12 bits are valid. + * @index: In. Only lower 5 bits are valid. + * @presync: In. 1 if presync operation, 0 if postsync. + * @cmd: In. One of QAIC_SEM_*. + * @flags: In. Bitfield. See QAIC_SEM_INSYNCFENCE and QAIC_SEM_OUTSYNCFENCE + * @pad: Structure padding. Must be 0. + */ +struct qaic_sem { + __u16 val; + __u8 index; + __u8 presync; + __u8 cmd; + __u8 flags; + __u16 pad; +}; + +/** + * struct qaic_attach_slice_entry - Defines a single BO slice. + * @size: In. Size of this slice in bytes. + * @sem0: In. Semaphore command 0. Must be 0 is not valid. + * @sem1: In. Semaphore command 1. Must be 0 is not valid. + * @sem2: In. Semaphore command 2. Must be 0 is not valid. + * @sem3: In. Semaphore command 3. Must be 0 is not valid. + * @dev_addr: In. Device address this slice pushes to or pulls from. + * @db_addr: In. Address of the doorbell to ring. + * @db_data: In. Data to write to the doorbell. + * @db_len: In. Size of the doorbell data in bits - 32, 16, or 8. 0 is for + * inactive doorbells. + * @offset: In. Start of this slice as an offset from the start of the BO. + */ +struct qaic_attach_slice_entry { + __u64 size; + struct qaic_sem sem0; + struct qaic_sem sem1; + struct qaic_sem sem2; + struct qaic_sem sem3; + __u64 dev_addr; + __u64 db_addr; + __u32 db_data; + __u32 db_len; + __u64 offset; +}; + +/** + * struct qaic_attach_slice_hdr - Defines metadata for a set of BO slices. + * @count: In. Number of slices for this BO. + * @dbc_id: In. Associate the sliced BO with this DBC. + * @handle: In. GEM handle of the BO to slice. + * @dir: In. Direction of data flow. 1 = DMA_TO_DEVICE, 2 = DMA_FROM_DEVICE + * @size: Deprecated. This value is ignored and size of @handle is used instead. + */ +struct qaic_attach_slice_hdr { + __u32 count; + __u32 dbc_id; + __u32 handle; + __u32 dir; + __u64 size; +}; + +/** + * struct qaic_attach_slice - Defines a set of BO slices. + * @hdr: In. Metadata of the set of slices. + * @data: In. Pointer to an array containing the slice definitions. + */ +struct qaic_attach_slice { + struct qaic_attach_slice_hdr hdr; + __u64 data; +}; + +/** + * struct qaic_execute_entry - Defines a BO to submit to the device. + * @handle: In. GEM handle of the BO to commit to the device. + * @dir: In. Direction of data. 1 = to device, 2 = from device. + */ +struct qaic_execute_entry { + __u32 handle; + __u32 dir; +}; + +/** + * struct qaic_partial_execute_entry - Defines a BO to resize and submit. + * @handle: In. GEM handle of the BO to commit to the device. + * @dir: In. Direction of data. 1 = to device, 2 = from device. + * @resize: In. New size of the BO. Must be <= the original BO size. + * @resize as 0 would be interpreted as no DMA transfer is + * involved. + */ +struct qaic_partial_execute_entry { + __u32 handle; + __u32 dir; + __u64 resize; +}; + +/** + * struct qaic_execute_hdr - Defines metadata for BO submission. + * @count: In. Number of BOs to submit. + * @dbc_id: In. DBC to submit the BOs on. + */ +struct qaic_execute_hdr { + __u32 count; + __u32 dbc_id; +}; + +/** + * struct qaic_execute - Defines a list of BOs to submit to the device. + * @hdr: In. BO list metadata. + * @data: In. Pointer to an array of BOs to submit. + */ +struct qaic_execute { + struct qaic_execute_hdr hdr; + __u64 data; +}; + +/** + * struct qaic_wait - Defines a blocking wait for BO execution. + * @handle: In. GEM handle of the BO to wait on. + * @timeout: In. Maximum time in ms to wait for the BO. + * @dbc_id: In. DBC the BO is submitted to. + * @pad: Structure padding. Must be 0. + */ +struct qaic_wait { + __u32 handle; + __u32 timeout; + __u32 dbc_id; + __u32 pad; +}; + +/** + * struct qaic_perf_stats_hdr - Defines metadata for getting BO perf info. + * @count: In. Number of BOs requested. + * @pad: Structure padding. Must be 0. + * @dbc_id: In. DBC the BO are associated with. + */ +struct qaic_perf_stats_hdr { + __u16 count; + __u16 pad; + __u32 dbc_id; +}; + +/** + * struct qaic_perf_stats - Defines a request for getting BO perf info. + * @hdr: In. Request metadata + * @data: In. Pointer to array of stats structures that will receive the data. + */ +struct qaic_perf_stats { + struct qaic_perf_stats_hdr hdr; + __u64 data; +}; + +/** + * struct qaic_perf_stats_entry - Defines a BO perf info. + * @handle: In. GEM handle of the BO to get perf stats for. + * @queue_level_before: Out. Number of elements in the queue before this BO + * was submitted. + * @num_queue_element: Out. Number of elements added to the queue to submit + * this BO. + * @submit_latency_us: Out. Time taken by the driver to submit this BO. + * @device_latency_us: Out. Time taken by the device to execute this BO. + * @pad: Structure padding. Must be 0. + */ +struct qaic_perf_stats_entry { + __u32 handle; + __u32 queue_level_before; + __u32 num_queue_element; + __u32 submit_latency_us; + __u32 device_latency_us; + __u32 pad; +}; + +/** + * struct qaic_detach_slice - Detaches slicing configuration from BO. + * @handle: In. GEM handle of the BO to detach slicing configuration. + * @pad: Structure padding. Must be 0. + */ +struct qaic_detach_slice { + __u32 handle; + __u32 pad; +}; + +#define DRM_QAIC_MANAGE 0x00 +#define DRM_QAIC_CREATE_BO 0x01 +#define DRM_QAIC_MMAP_BO 0x02 +#define DRM_QAIC_ATTACH_SLICE_BO 0x03 +#define DRM_QAIC_EXECUTE_BO 0x04 +#define DRM_QAIC_PARTIAL_EXECUTE_BO 0x05 +#define DRM_QAIC_WAIT_BO 0x06 +#define DRM_QAIC_PERF_STATS_BO 0x07 +#define DRM_QAIC_DETACH_SLICE_BO 0x08 + +#define DRM_IOCTL_QAIC_MANAGE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_QAIC_MANAGE, struct qaic_manage_msg) +#define DRM_IOCTL_QAIC_CREATE_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_QAIC_CREATE_BO, struct qaic_create_bo) +#define DRM_IOCTL_QAIC_MMAP_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_QAIC_MMAP_BO, struct qaic_mmap_bo) +#define DRM_IOCTL_QAIC_ATTACH_SLICE_BO \ + DRM_IOW(DRM_COMMAND_BASE + DRM_QAIC_ATTACH_SLICE_BO, \ + struct qaic_attach_slice) +#define DRM_IOCTL_QAIC_EXECUTE_BO \ + DRM_IOW(DRM_COMMAND_BASE + DRM_QAIC_EXECUTE_BO, struct qaic_execute) +#define DRM_IOCTL_QAIC_PARTIAL_EXECUTE_BO \ + DRM_IOW(DRM_COMMAND_BASE + DRM_QAIC_PARTIAL_EXECUTE_BO, \ + struct qaic_execute) +#define DRM_IOCTL_QAIC_WAIT_BO \ + DRM_IOW(DRM_COMMAND_BASE + DRM_QAIC_WAIT_BO, struct qaic_wait) +#define DRM_IOCTL_QAIC_PERF_STATS_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_QAIC_PERF_STATS_BO, \ + struct qaic_perf_stats) +#define DRM_IOCTL_QAIC_DETACH_SLICE_BO \ + DRM_IOW(DRM_COMMAND_BASE + DRM_QAIC_DETACH_SLICE_BO, \ + struct qaic_detach_slice) + +#if defined(__cplusplus) +} +#endif + +#endif /* QAIC_ACCEL_H_ */ diff --git a/include/arch/x86_64/drm/qxl_drm.h b/include/arch/x86_64/drm/qxl_drm.h new file mode 100644 index 00000000..56bed67c --- /dev/null +++ b/include/arch/x86_64/drm/qxl_drm.h @@ -0,0 +1,156 @@ +/* + * Copyright 2013 Red Hat + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef QXL_DRM_H +#define QXL_DRM_H + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints. + * + * Do not use pointers, use __u64 instead for 32 bit / 64 bit user/kernel + * compatibility Keep fields aligned to their size + */ + +#define QXL_GEM_DOMAIN_CPU 0 +#define QXL_GEM_DOMAIN_VRAM 1 +#define QXL_GEM_DOMAIN_SURFACE 2 + +#define DRM_QXL_ALLOC 0x00 +#define DRM_QXL_MAP 0x01 +#define DRM_QXL_EXECBUFFER 0x02 +#define DRM_QXL_UPDATE_AREA 0x03 +#define DRM_QXL_GETPARAM 0x04 +#define DRM_QXL_CLIENTCAP 0x05 + +#define DRM_QXL_ALLOC_SURF 0x06 + +struct drm_qxl_alloc { + __u32 size; + __u32 handle; /* 0 is an invalid handle */ +}; + +struct drm_qxl_map { + __u64 offset; /* use for mmap system call */ + __u32 handle; + __u32 pad; +}; + +/* + * dest is the bo we are writing the relocation into + * src is bo we are relocating. + * *(dest_handle.base_addr + dest_offset) = physical_address(src_handle.addr + + * src_offset) + */ +#define QXL_RELOC_TYPE_BO 1 +#define QXL_RELOC_TYPE_SURF 2 + +struct drm_qxl_reloc { + __u64 src_offset; /* offset into src_handle or src buffer */ + __u64 dst_offset; /* offset in dest handle */ + __u32 src_handle; /* dest handle to compute address from */ + __u32 dst_handle; /* 0 if to command buffer */ + __u32 reloc_type; + __u32 pad; +}; + +struct drm_qxl_command { + __u64 command; /* void* */ + __u64 relocs; /* struct drm_qxl_reloc* */ + __u32 type; + __u32 command_size; + __u32 relocs_num; + __u32 pad; +}; + +struct drm_qxl_execbuffer { + __u32 flags; /* for future use */ + __u32 commands_num; + __u64 commands; /* struct drm_qxl_command* */ +}; + +struct drm_qxl_update_area { + __u32 handle; + __u32 top; + __u32 left; + __u32 bottom; + __u32 right; + __u32 pad; +}; + +#define QXL_PARAM_NUM_SURFACES 1 /* rom->n_surfaces */ +#define QXL_PARAM_MAX_RELOCS 2 +struct drm_qxl_getparam { + __u64 param; + __u64 value; +}; + +/* these are one bit values */ +struct drm_qxl_clientcap { + __u32 index; + __u32 pad; +}; + +struct drm_qxl_alloc_surf { + __u32 format; + __u32 width; + __u32 height; + __s32 stride; + __u32 handle; + __u32 pad; +}; + +#define DRM_IOCTL_QXL_ALLOC \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_QXL_ALLOC, struct drm_qxl_alloc) + +#define DRM_IOCTL_QXL_MAP \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_QXL_MAP, struct drm_qxl_map) + +#define DRM_IOCTL_QXL_EXECBUFFER \ + DRM_IOW(DRM_COMMAND_BASE + DRM_QXL_EXECBUFFER, \ + struct drm_qxl_execbuffer) + +#define DRM_IOCTL_QXL_UPDATE_AREA \ + DRM_IOW(DRM_COMMAND_BASE + DRM_QXL_UPDATE_AREA, \ + struct drm_qxl_update_area) + +#define DRM_IOCTL_QXL_GETPARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_QXL_GETPARAM, struct drm_qxl_getparam) + +#define DRM_IOCTL_QXL_CLIENTCAP \ + DRM_IOW(DRM_COMMAND_BASE + DRM_QXL_CLIENTCAP, struct drm_qxl_clientcap) + +#define DRM_IOCTL_QXL_ALLOC_SURF \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_QXL_ALLOC_SURF, \ + struct drm_qxl_alloc_surf) + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/include/arch/x86_64/drm/radeon_drm.h b/include/arch/x86_64/drm/radeon_drm.h new file mode 100644 index 00000000..825b8919 --- /dev/null +++ b/include/arch/x86_64/drm/radeon_drm.h @@ -0,0 +1,1135 @@ +/* radeon_drm.h -- Public header for the radeon driver -*- linux-c -*- + * + * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Fremont, California. + * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Kevin E. Martin <martin@valinux.com> + * Gareth Hughes <gareth@valinux.com> + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef __RADEON_DRM_H__ +#define __RADEON_DRM_H__ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* WARNING: If you change any of these defines, make sure to change the + * defines in the X server file (radeon_sarea.h) + */ +#ifndef __RADEON_SAREA_DEFINES__ +#define __RADEON_SAREA_DEFINES__ + +/* Old style state flags, required for sarea interface (1.1 and 1.2 + * clears) and 1.2 drm_vertex2 ioctl. + */ +#define RADEON_UPLOAD_CONTEXT 0x00000001 +#define RADEON_UPLOAD_VERTFMT 0x00000002 +#define RADEON_UPLOAD_LINE 0x00000004 +#define RADEON_UPLOAD_BUMPMAP 0x00000008 +#define RADEON_UPLOAD_MASKS 0x00000010 +#define RADEON_UPLOAD_VIEWPORT 0x00000020 +#define RADEON_UPLOAD_SETUP 0x00000040 +#define RADEON_UPLOAD_TCL 0x00000080 +#define RADEON_UPLOAD_MISC 0x00000100 +#define RADEON_UPLOAD_TEX0 0x00000200 +#define RADEON_UPLOAD_TEX1 0x00000400 +#define RADEON_UPLOAD_TEX2 0x00000800 +#define RADEON_UPLOAD_TEX0IMAGES 0x00001000 +#define RADEON_UPLOAD_TEX1IMAGES 0x00002000 +#define RADEON_UPLOAD_TEX2IMAGES 0x00004000 +#define RADEON_UPLOAD_CLIPRECTS 0x00008000 /* handled client-side */ +#define RADEON_REQUIRE_QUIESCENCE 0x00010000 +#define RADEON_UPLOAD_ZBIAS 0x00020000 /* version 1.2 and newer */ +#define RADEON_UPLOAD_ALL 0x003effff +#define RADEON_UPLOAD_CONTEXT_ALL 0x003e01ff + +/* New style per-packet identifiers for use in cmd_buffer ioctl with + * the RADEON_EMIT_PACKET command. Comments relate new packets to old + * state bits and the packet size: + */ +#define RADEON_EMIT_PP_MISC 0 /* context/7 */ +#define RADEON_EMIT_PP_CNTL 1 /* context/3 */ +#define RADEON_EMIT_RB3D_COLORPITCH 2 /* context/1 */ +#define RADEON_EMIT_RE_LINE_PATTERN 3 /* line/2 */ +#define RADEON_EMIT_SE_LINE_WIDTH 4 /* line/1 */ +#define RADEON_EMIT_PP_LUM_MATRIX 5 /* bumpmap/1 */ +#define RADEON_EMIT_PP_ROT_MATRIX_0 6 /* bumpmap/2 */ +#define RADEON_EMIT_RB3D_STENCILREFMASK 7 /* masks/3 */ +#define RADEON_EMIT_SE_VPORT_XSCALE 8 /* viewport/6 */ +#define RADEON_EMIT_SE_CNTL 9 /* setup/2 */ +#define RADEON_EMIT_SE_CNTL_STATUS 10 /* setup/1 */ +#define RADEON_EMIT_RE_MISC 11 /* misc/1 */ +#define RADEON_EMIT_PP_TXFILTER_0 12 /* tex0/6 */ +#define RADEON_EMIT_PP_BORDER_COLOR_0 13 /* tex0/1 */ +#define RADEON_EMIT_PP_TXFILTER_1 14 /* tex1/6 */ +#define RADEON_EMIT_PP_BORDER_COLOR_1 15 /* tex1/1 */ +#define RADEON_EMIT_PP_TXFILTER_2 16 /* tex2/6 */ +#define RADEON_EMIT_PP_BORDER_COLOR_2 17 /* tex2/1 */ +#define RADEON_EMIT_SE_ZBIAS_FACTOR 18 /* zbias/2 */ +#define RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT 19 /* tcl/11 */ +#define RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED 20 /* material/17 */ +#define R200_EMIT_PP_TXCBLEND_0 21 /* tex0/4 */ +#define R200_EMIT_PP_TXCBLEND_1 22 /* tex1/4 */ +#define R200_EMIT_PP_TXCBLEND_2 23 /* tex2/4 */ +#define R200_EMIT_PP_TXCBLEND_3 24 /* tex3/4 */ +#define R200_EMIT_PP_TXCBLEND_4 25 /* tex4/4 */ +#define R200_EMIT_PP_TXCBLEND_5 26 /* tex5/4 */ +#define R200_EMIT_PP_TXCBLEND_6 27 /* /4 */ +#define R200_EMIT_PP_TXCBLEND_7 28 /* /4 */ +#define R200_EMIT_TCL_LIGHT_MODEL_CTL_0 29 /* tcl/7 */ +#define R200_EMIT_TFACTOR_0 30 /* tf/7 */ +#define R200_EMIT_VTX_FMT_0 31 /* vtx/5 */ +#define R200_EMIT_VAP_CTL 32 /* vap/1 */ +#define R200_EMIT_MATRIX_SELECT_0 33 /* msl/5 */ +#define R200_EMIT_TEX_PROC_CTL_2 34 /* tcg/5 */ +#define R200_EMIT_TCL_UCP_VERT_BLEND_CTL 35 /* tcl/1 */ +#define R200_EMIT_PP_TXFILTER_0 36 /* tex0/6 */ +#define R200_EMIT_PP_TXFILTER_1 37 /* tex1/6 */ +#define R200_EMIT_PP_TXFILTER_2 38 /* tex2/6 */ +#define R200_EMIT_PP_TXFILTER_3 39 /* tex3/6 */ +#define R200_EMIT_PP_TXFILTER_4 40 /* tex4/6 */ +#define R200_EMIT_PP_TXFILTER_5 41 /* tex5/6 */ +#define R200_EMIT_PP_TXOFFSET_0 42 /* tex0/1 */ +#define R200_EMIT_PP_TXOFFSET_1 43 /* tex1/1 */ +#define R200_EMIT_PP_TXOFFSET_2 44 /* tex2/1 */ +#define R200_EMIT_PP_TXOFFSET_3 45 /* tex3/1 */ +#define R200_EMIT_PP_TXOFFSET_4 46 /* tex4/1 */ +#define R200_EMIT_PP_TXOFFSET_5 47 /* tex5/1 */ +#define R200_EMIT_VTE_CNTL 48 /* vte/1 */ +#define R200_EMIT_OUTPUT_VTX_COMP_SEL 49 /* vtx/1 */ +#define R200_EMIT_PP_TAM_DEBUG3 50 /* tam/1 */ +#define R200_EMIT_PP_CNTL_X 51 /* cst/1 */ +#define R200_EMIT_RB3D_DEPTHXY_OFFSET 52 /* cst/1 */ +#define R200_EMIT_RE_AUX_SCISSOR_CNTL 53 /* cst/1 */ +#define R200_EMIT_RE_SCISSOR_TL_0 54 /* cst/2 */ +#define R200_EMIT_RE_SCISSOR_TL_1 55 /* cst/2 */ +#define R200_EMIT_RE_SCISSOR_TL_2 56 /* cst/2 */ +#define R200_EMIT_SE_VAP_CNTL_STATUS 57 /* cst/1 */ +#define R200_EMIT_SE_VTX_STATE_CNTL 58 /* cst/1 */ +#define R200_EMIT_RE_POINTSIZE 59 /* cst/1 */ +#define R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0 60 /* cst/4 */ +#define R200_EMIT_PP_CUBIC_FACES_0 61 +#define R200_EMIT_PP_CUBIC_OFFSETS_0 62 +#define R200_EMIT_PP_CUBIC_FACES_1 63 +#define R200_EMIT_PP_CUBIC_OFFSETS_1 64 +#define R200_EMIT_PP_CUBIC_FACES_2 65 +#define R200_EMIT_PP_CUBIC_OFFSETS_2 66 +#define R200_EMIT_PP_CUBIC_FACES_3 67 +#define R200_EMIT_PP_CUBIC_OFFSETS_3 68 +#define R200_EMIT_PP_CUBIC_FACES_4 69 +#define R200_EMIT_PP_CUBIC_OFFSETS_4 70 +#define R200_EMIT_PP_CUBIC_FACES_5 71 +#define R200_EMIT_PP_CUBIC_OFFSETS_5 72 +#define RADEON_EMIT_PP_TEX_SIZE_0 73 +#define RADEON_EMIT_PP_TEX_SIZE_1 74 +#define RADEON_EMIT_PP_TEX_SIZE_2 75 +#define R200_EMIT_RB3D_BLENDCOLOR 76 +#define R200_EMIT_TCL_POINT_SPRITE_CNTL 77 +#define RADEON_EMIT_PP_CUBIC_FACES_0 78 +#define RADEON_EMIT_PP_CUBIC_OFFSETS_T0 79 +#define RADEON_EMIT_PP_CUBIC_FACES_1 80 +#define RADEON_EMIT_PP_CUBIC_OFFSETS_T1 81 +#define RADEON_EMIT_PP_CUBIC_FACES_2 82 +#define RADEON_EMIT_PP_CUBIC_OFFSETS_T2 83 +#define R200_EMIT_PP_TRI_PERF_CNTL 84 +#define R200_EMIT_PP_AFS_0 85 +#define R200_EMIT_PP_AFS_1 86 +#define R200_EMIT_ATF_TFACTOR 87 +#define R200_EMIT_PP_TXCTLALL_0 88 +#define R200_EMIT_PP_TXCTLALL_1 89 +#define R200_EMIT_PP_TXCTLALL_2 90 +#define R200_EMIT_PP_TXCTLALL_3 91 +#define R200_EMIT_PP_TXCTLALL_4 92 +#define R200_EMIT_PP_TXCTLALL_5 93 +#define R200_EMIT_VAP_PVS_CNTL 94 +#define RADEON_MAX_STATE_PACKETS 95 + +/* Commands understood by cmd_buffer ioctl. More can be added but + * obviously these can't be removed or changed: + */ +#define RADEON_CMD_PACKET 1 /* emit one of the register packets above */ +#define RADEON_CMD_SCALARS 2 /* emit scalar data */ +#define RADEON_CMD_VECTORS 3 /* emit vector data */ +#define RADEON_CMD_DMA_DISCARD 4 /* discard current dma buf */ +#define RADEON_CMD_PACKET3 5 /* emit hw packet */ +#define RADEON_CMD_PACKET3_CLIP 6 /* emit hw packet wrapped in cliprects */ +#define RADEON_CMD_SCALARS2 7 /* r200 stopgap */ +#define RADEON_CMD_WAIT \ + 8 /* emit hw wait commands -- note: \ + * doesn't make the cpu wait, just \ + * the graphics hardware */ +#define RADEON_CMD_VECLINEAR 9 /* another r200 stopgap */ + +typedef union { + int i; + struct { + unsigned char cmd_type, pad0, pad1, pad2; + } header; + struct { + unsigned char cmd_type, packet_id, pad0, pad1; + } packet; + struct { + unsigned char cmd_type, offset, stride, count; + } scalars; + struct { + unsigned char cmd_type, offset, stride, count; + } vectors; + struct { + unsigned char cmd_type, addr_lo, addr_hi, count; + } veclinear; + struct { + unsigned char cmd_type, buf_idx, pad0, pad1; + } dma; + struct { + unsigned char cmd_type, flags, pad0, pad1; + } wait; +} drm_radeon_cmd_header_t; + +#define RADEON_WAIT_2D 0x1 +#define RADEON_WAIT_3D 0x2 + +/* Allowed parameters for R300_CMD_PACKET3 + */ +#define R300_CMD_PACKET3_CLEAR 0 +#define R300_CMD_PACKET3_RAW 1 + +/* Commands understood by cmd_buffer ioctl for R300. + * The interface has not been stabilized, so some of these may be removed + * and eventually reordered before stabilization. + */ +#define R300_CMD_PACKET0 1 +#define R300_CMD_VPU 2 /* emit vertex program upload */ +#define R300_CMD_PACKET3 3 /* emit a packet3 */ +#define R300_CMD_END3D 4 /* emit sequence ending 3d rendering */ +#define R300_CMD_CP_DELAY 5 +#define R300_CMD_DMA_DISCARD 6 +#define R300_CMD_WAIT 7 +#define R300_WAIT_2D 0x1 +#define R300_WAIT_3D 0x2 +/* these two defines are DOING IT WRONG - however + * we have userspace which relies on using these. + * The wait interface is backwards compat new + * code should use the NEW_WAIT defines below + * THESE ARE NOT BIT FIELDS + */ +#define R300_WAIT_2D_CLEAN 0x3 +#define R300_WAIT_3D_CLEAN 0x4 + +#define R300_NEW_WAIT_2D_3D 0x3 +#define R300_NEW_WAIT_2D_2D_CLEAN 0x4 +#define R300_NEW_WAIT_3D_3D_CLEAN 0x6 +#define R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN 0x8 + +#define R300_CMD_SCRATCH 8 +#define R300_CMD_R500FP 9 + +typedef union { + unsigned int u; + struct { + unsigned char cmd_type, pad0, pad1, pad2; + } header; + struct { + unsigned char cmd_type, count, reglo, reghi; + } packet0; + struct { + unsigned char cmd_type, count, adrlo, adrhi; + } vpu; + struct { + unsigned char cmd_type, packet, pad0, pad1; + } packet3; + struct { + unsigned char cmd_type, packet; + unsigned short count; /* amount of packet2 to emit */ + } delay; + struct { + unsigned char cmd_type, buf_idx, pad0, pad1; + } dma; + struct { + unsigned char cmd_type, flags, pad0, pad1; + } wait; + struct { + unsigned char cmd_type, reg, n_bufs, flags; + } scratch; + struct { + unsigned char cmd_type, count, adrlo, adrhi_flags; + } r500fp; +} drm_r300_cmd_header_t; + +#define RADEON_FRONT 0x1 +#define RADEON_BACK 0x2 +#define RADEON_DEPTH 0x4 +#define RADEON_STENCIL 0x8 +#define RADEON_CLEAR_FASTZ 0x80000000 +#define RADEON_USE_HIERZ 0x40000000 +#define RADEON_USE_COMP_ZBUF 0x20000000 + +#define R500FP_CONSTANT_TYPE (1 << 1) +#define R500FP_CONSTANT_CLAMP (1 << 2) + +/* Primitive types + */ +#define RADEON_POINTS 0x1 +#define RADEON_LINES 0x2 +#define RADEON_LINE_STRIP 0x3 +#define RADEON_TRIANGLES 0x4 +#define RADEON_TRIANGLE_FAN 0x5 +#define RADEON_TRIANGLE_STRIP 0x6 + +/* Vertex/indirect buffer size + */ +#define RADEON_BUFFER_SIZE 65536 + +/* Byte offsets for indirect buffer data + */ +#define RADEON_INDEX_PRIM_OFFSET 20 + +#define RADEON_SCRATCH_REG_OFFSET 32 + +#define R600_SCRATCH_REG_OFFSET 256 + +#define RADEON_NR_SAREA_CLIPRECTS 12 + +/* There are 2 heaps (local/GART). Each region within a heap is a + * minimum of 64k, and there are at most 64 of them per heap. + */ +#define RADEON_LOCAL_TEX_HEAP 0 +#define RADEON_GART_TEX_HEAP 1 +#define RADEON_NR_TEX_HEAPS 2 +#define RADEON_NR_TEX_REGIONS 64 +#define RADEON_LOG_TEX_GRANULARITY 16 + +#define RADEON_MAX_TEXTURE_LEVELS 12 +#define RADEON_MAX_TEXTURE_UNITS 3 + +#define RADEON_MAX_SURFACES 8 + +/* Blits have strict offset rules. All blit offset must be aligned on + * a 1K-byte boundary. + */ +#define RADEON_OFFSET_SHIFT 10 +#define RADEON_OFFSET_ALIGN (1 << RADEON_OFFSET_SHIFT) +#define RADEON_OFFSET_MASK (RADEON_OFFSET_ALIGN - 1) + +#endif /* __RADEON_SAREA_DEFINES__ */ + +typedef struct { + unsigned int red; + unsigned int green; + unsigned int blue; + unsigned int alpha; +} radeon_color_regs_t; + +typedef struct { + /* Context state */ + unsigned int pp_misc; /* 0x1c14 */ + unsigned int pp_fog_color; + unsigned int re_solid_color; + unsigned int rb3d_blendcntl; + unsigned int rb3d_depthoffset; + unsigned int rb3d_depthpitch; + unsigned int rb3d_zstencilcntl; + + unsigned int pp_cntl; /* 0x1c38 */ + unsigned int rb3d_cntl; + unsigned int rb3d_coloroffset; + unsigned int re_width_height; + unsigned int rb3d_colorpitch; + unsigned int se_cntl; + + /* Vertex format state */ + unsigned int se_coord_fmt; /* 0x1c50 */ + + /* Line state */ + unsigned int re_line_pattern; /* 0x1cd0 */ + unsigned int re_line_state; + + unsigned int se_line_width; /* 0x1db8 */ + + /* Bumpmap state */ + unsigned int pp_lum_matrix; /* 0x1d00 */ + + unsigned int pp_rot_matrix_0; /* 0x1d58 */ + unsigned int pp_rot_matrix_1; + + /* Mask state */ + unsigned int rb3d_stencilrefmask; /* 0x1d7c */ + unsigned int rb3d_ropcntl; + unsigned int rb3d_planemask; + + /* Viewport state */ + unsigned int se_vport_xscale; /* 0x1d98 */ + unsigned int se_vport_xoffset; + unsigned int se_vport_yscale; + unsigned int se_vport_yoffset; + unsigned int se_vport_zscale; + unsigned int se_vport_zoffset; + + /* Setup state */ + unsigned int se_cntl_status; /* 0x2140 */ + + /* Misc state */ + unsigned int re_top_left; /* 0x26c0 */ + unsigned int re_misc; +} drm_radeon_context_regs_t; + +typedef struct { + /* Zbias state */ + unsigned int se_zbias_factor; /* 0x1dac */ + unsigned int se_zbias_constant; +} drm_radeon_context2_regs_t; + +/* Setup registers for each texture unit + */ +typedef struct { + unsigned int pp_txfilter; + unsigned int pp_txformat; + unsigned int pp_txoffset; + unsigned int pp_txcblend; + unsigned int pp_txablend; + unsigned int pp_tfactor; + unsigned int pp_border_color; +} drm_radeon_texture_regs_t; + +typedef struct { + unsigned int start; + unsigned int finish; + unsigned int prim : 8; + unsigned int stateidx : 8; + unsigned int numverts : 16; /* overloaded as offset/64 for elt prims */ + unsigned int vc_format; /* vertex format */ +} drm_radeon_prim_t; + +typedef struct { + drm_radeon_context_regs_t context; + drm_radeon_texture_regs_t tex[RADEON_MAX_TEXTURE_UNITS]; + drm_radeon_context2_regs_t context2; + unsigned int dirty; +} drm_radeon_state_t; + +typedef struct { + /* The channel for communication of state information to the + * kernel on firing a vertex buffer with either of the + * obsoleted vertex/index ioctls. + */ + drm_radeon_context_regs_t context_state; + drm_radeon_texture_regs_t tex_state[RADEON_MAX_TEXTURE_UNITS]; + unsigned int dirty; + unsigned int vertsize; + unsigned int vc_format; + + /* The current cliprects, or a subset thereof. + */ + struct drm_clip_rect boxes[RADEON_NR_SAREA_CLIPRECTS]; + unsigned int nbox; + + /* Counters for client-side throttling of rendering clients. + */ + unsigned int last_frame; + unsigned int last_dispatch; + unsigned int last_clear; + + struct drm_tex_region tex_list[RADEON_NR_TEX_HEAPS] + [RADEON_NR_TEX_REGIONS + 1]; + unsigned int tex_age[RADEON_NR_TEX_HEAPS]; + int ctx_owner; + int pfState; /* number of 3d windows (0,1,2ormore) */ + int pfCurrentPage; /* which buffer is being displayed? */ + int crtc2_base; /* CRTC2 frame offset */ + int tiling_enabled; /* set by drm, read by 2d + 3d clients */ +} drm_radeon_sarea_t; + +/* WARNING: If you change any of these defines, make sure to change the + * defines in the Xserver file (xf86drmRadeon.h) + * + * KW: actually it's illegal to change any of this (backwards compatibility). + */ + +/* Radeon specific ioctls + * The device specific ioctl range is 0x40 to 0x79. + */ +#define DRM_RADEON_CP_INIT 0x00 +#define DRM_RADEON_CP_START 0x01 +#define DRM_RADEON_CP_STOP 0x02 +#define DRM_RADEON_CP_RESET 0x03 +#define DRM_RADEON_CP_IDLE 0x04 +#define DRM_RADEON_RESET 0x05 +#define DRM_RADEON_FULLSCREEN 0x06 +#define DRM_RADEON_SWAP 0x07 +#define DRM_RADEON_CLEAR 0x08 +#define DRM_RADEON_VERTEX 0x09 +#define DRM_RADEON_INDICES 0x0A +#define DRM_RADEON_NOT_USED +#define DRM_RADEON_STIPPLE 0x0C +#define DRM_RADEON_INDIRECT 0x0D +#define DRM_RADEON_TEXTURE 0x0E +#define DRM_RADEON_VERTEX2 0x0F +#define DRM_RADEON_CMDBUF 0x10 +#define DRM_RADEON_GETPARAM 0x11 +#define DRM_RADEON_FLIP 0x12 +#define DRM_RADEON_ALLOC 0x13 +#define DRM_RADEON_FREE 0x14 +#define DRM_RADEON_INIT_HEAP 0x15 +#define DRM_RADEON_IRQ_EMIT 0x16 +#define DRM_RADEON_IRQ_WAIT 0x17 +#define DRM_RADEON_CP_RESUME 0x18 +#define DRM_RADEON_SETPARAM 0x19 +#define DRM_RADEON_SURF_ALLOC 0x1a +#define DRM_RADEON_SURF_FREE 0x1b +/* KMS ioctl */ +#define DRM_RADEON_GEM_INFO 0x1c +#define DRM_RADEON_GEM_CREATE 0x1d +#define DRM_RADEON_GEM_MMAP 0x1e +#define DRM_RADEON_GEM_PREAD 0x21 +#define DRM_RADEON_GEM_PWRITE 0x22 +#define DRM_RADEON_GEM_SET_DOMAIN 0x23 +#define DRM_RADEON_GEM_WAIT_IDLE 0x24 +#define DRM_RADEON_CS 0x26 +#define DRM_RADEON_INFO 0x27 +#define DRM_RADEON_GEM_SET_TILING 0x28 +#define DRM_RADEON_GEM_GET_TILING 0x29 +#define DRM_RADEON_GEM_BUSY 0x2a +#define DRM_RADEON_GEM_VA 0x2b +#define DRM_RADEON_GEM_OP 0x2c +#define DRM_RADEON_GEM_USERPTR 0x2d + +#define DRM_IOCTL_RADEON_CP_INIT \ + DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) +#define DRM_IOCTL_RADEON_CP_START DRM_IO(DRM_COMMAND_BASE + DRM_RADEON_CP_START) +#define DRM_IOCTL_RADEON_CP_STOP \ + DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_CP_STOP, drm_radeon_cp_stop_t) +#define DRM_IOCTL_RADEON_CP_RESET DRM_IO(DRM_COMMAND_BASE + DRM_RADEON_CP_RESET) +#define DRM_IOCTL_RADEON_CP_IDLE DRM_IO(DRM_COMMAND_BASE + DRM_RADEON_CP_IDLE) +#define DRM_IOCTL_RADEON_RESET DRM_IO(DRM_COMMAND_BASE + DRM_RADEON_RESET) +#define DRM_IOCTL_RADEON_FULLSCREEN \ + DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_FULLSCREEN, \ + drm_radeon_fullscreen_t) +#define DRM_IOCTL_RADEON_SWAP DRM_IO(DRM_COMMAND_BASE + DRM_RADEON_SWAP) +#define DRM_IOCTL_RADEON_CLEAR \ + DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_CLEAR, drm_radeon_clear_t) +#define DRM_IOCTL_RADEON_VERTEX \ + DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_VERTEX, drm_radeon_vertex_t) +#define DRM_IOCTL_RADEON_INDICES \ + DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_INDICES, drm_radeon_indices_t) +#define DRM_IOCTL_RADEON_STIPPLE \ + DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_STIPPLE, drm_radeon_stipple_t) +#define DRM_IOCTL_RADEON_INDIRECT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INDIRECT, drm_radeon_indirect_t) +#define DRM_IOCTL_RADEON_TEXTURE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_TEXTURE, drm_radeon_texture_t) +#define DRM_IOCTL_RADEON_VERTEX2 \ + DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_VERTEX2, drm_radeon_vertex2_t) +#define DRM_IOCTL_RADEON_CMDBUF \ + DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_CMDBUF, drm_radeon_cmd_buffer_t) +#define DRM_IOCTL_RADEON_GETPARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GETPARAM, drm_radeon_getparam_t) +#define DRM_IOCTL_RADEON_FLIP DRM_IO(DRM_COMMAND_BASE + DRM_RADEON_FLIP) +#define DRM_IOCTL_RADEON_ALLOC \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_ALLOC, drm_radeon_mem_alloc_t) +#define DRM_IOCTL_RADEON_FREE \ + DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_FREE, drm_radeon_mem_free_t) +#define DRM_IOCTL_RADEON_INIT_HEAP \ + DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_INIT_HEAP, \ + drm_radeon_mem_init_heap_t) +#define DRM_IOCTL_RADEON_IRQ_EMIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_IRQ_EMIT, drm_radeon_irq_emit_t) +#define DRM_IOCTL_RADEON_IRQ_WAIT \ + DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_IRQ_WAIT, drm_radeon_irq_wait_t) +#define DRM_IOCTL_RADEON_CP_RESUME \ + DRM_IO(DRM_COMMAND_BASE + DRM_RADEON_CP_RESUME) +#define DRM_IOCTL_RADEON_SETPARAM \ + DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_SETPARAM, drm_radeon_setparam_t) +#define DRM_IOCTL_RADEON_SURF_ALLOC \ + DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_SURF_ALLOC, \ + drm_radeon_surface_alloc_t) +#define DRM_IOCTL_RADEON_SURF_FREE \ + DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_SURF_FREE, \ + drm_radeon_surface_free_t) +/* KMS */ +#define DRM_IOCTL_RADEON_GEM_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_INFO, \ + struct drm_radeon_gem_info) +#define DRM_IOCTL_RADEON_GEM_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_CREATE, \ + struct drm_radeon_gem_create) +#define DRM_IOCTL_RADEON_GEM_MMAP \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_MMAP, \ + struct drm_radeon_gem_mmap) +#define DRM_IOCTL_RADEON_GEM_PREAD \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PREAD, \ + struct drm_radeon_gem_pread) +#define DRM_IOCTL_RADEON_GEM_PWRITE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PWRITE, \ + struct drm_radeon_gem_pwrite) +#define DRM_IOCTL_RADEON_GEM_SET_DOMAIN \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_DOMAIN, \ + struct drm_radeon_gem_set_domain) +#define DRM_IOCTL_RADEON_GEM_WAIT_IDLE \ + DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, \ + struct drm_radeon_gem_wait_idle) +#define DRM_IOCTL_RADEON_CS \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs) +#define DRM_IOCTL_RADEON_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info) +#define DRM_IOCTL_RADEON_GEM_SET_TILING \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, \ + struct drm_radeon_gem_set_tiling) +#define DRM_IOCTL_RADEON_GEM_GET_TILING \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, \ + struct drm_radeon_gem_get_tiling) +#define DRM_IOCTL_RADEON_GEM_BUSY \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, \ + struct drm_radeon_gem_busy) +#define DRM_IOCTL_RADEON_GEM_VA \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) +#define DRM_IOCTL_RADEON_GEM_OP \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op) +#define DRM_IOCTL_RADEON_GEM_USERPTR \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_USERPTR, \ + struct drm_radeon_gem_userptr) + +typedef struct drm_radeon_init { + enum { + RADEON_INIT_CP = 0x01, + RADEON_CLEANUP_CP = 0x02, + RADEON_INIT_R200_CP = 0x03, + RADEON_INIT_R300_CP = 0x04, + RADEON_INIT_R600_CP = 0x05 + } func; + unsigned long sarea_priv_offset; + int is_pci; + int cp_mode; + int gart_size; + int ring_size; + int usec_timeout; + + unsigned int fb_bpp; + unsigned int front_offset, front_pitch; + unsigned int back_offset, back_pitch; + unsigned int depth_bpp; + unsigned int depth_offset, depth_pitch; + + unsigned long fb_offset; + unsigned long mmio_offset; + unsigned long ring_offset; + unsigned long ring_rptr_offset; + unsigned long buffers_offset; + unsigned long gart_textures_offset; +} drm_radeon_init_t; + +typedef struct drm_radeon_cp_stop { + int flush; + int idle; +} drm_radeon_cp_stop_t; + +typedef struct drm_radeon_fullscreen { + enum { + RADEON_INIT_FULLSCREEN = 0x01, + RADEON_CLEANUP_FULLSCREEN = 0x02 + } func; +} drm_radeon_fullscreen_t; + +#define CLEAR_X1 0 +#define CLEAR_Y1 1 +#define CLEAR_X2 2 +#define CLEAR_Y2 3 +#define CLEAR_DEPTH 4 + +typedef union drm_radeon_clear_rect { + float f[5]; + unsigned int ui[5]; +} drm_radeon_clear_rect_t; + +typedef struct drm_radeon_clear { + unsigned int flags; + unsigned int clear_color; + unsigned int clear_depth; + unsigned int color_mask; + unsigned int depth_mask; /* misnamed field: should be stencil */ + drm_radeon_clear_rect_t *depth_boxes; +} drm_radeon_clear_t; + +typedef struct drm_radeon_vertex { + int prim; + int idx; /* Index of vertex buffer */ + int count; /* Number of vertices in buffer */ + int discard; /* Client finished with buffer? */ +} drm_radeon_vertex_t; + +typedef struct drm_radeon_indices { + int prim; + int idx; + int start; + int end; + int discard; /* Client finished with buffer? */ +} drm_radeon_indices_t; + +/* v1.2 - obsoletes drm_radeon_vertex and drm_radeon_indices + * - allows multiple primitives and state changes in a single ioctl + * - supports driver change to emit native primitives + */ +typedef struct drm_radeon_vertex2 { + int idx; /* Index of vertex buffer */ + int discard; /* Client finished with buffer? */ + int nr_states; + drm_radeon_state_t *state; + int nr_prims; + drm_radeon_prim_t *prim; +} drm_radeon_vertex2_t; + +/* v1.3 - obsoletes drm_radeon_vertex2 + * - allows arbitrarily large cliprect list + * - allows updating of tcl packet, vector and scalar state + * - allows memory-efficient description of state updates + * - allows state to be emitted without a primitive + * (for clears, ctx switches) + * - allows more than one dma buffer to be referenced per ioctl + * - supports tcl driver + * - may be extended in future versions with new cmd types, packets + */ +typedef struct drm_radeon_cmd_buffer { + int bufsz; + char *buf; + int nbox; + struct drm_clip_rect *boxes; +} drm_radeon_cmd_buffer_t; + +typedef struct drm_radeon_tex_image { + unsigned int x, y; /* Blit coordinates */ + unsigned int width, height; + const void *data; +} drm_radeon_tex_image_t; + +typedef struct drm_radeon_texture { + unsigned int offset; + int pitch; + int format; + int width; /* Texture image coordinates */ + int height; + drm_radeon_tex_image_t *image; +} drm_radeon_texture_t; + +typedef struct drm_radeon_stipple { + unsigned int *mask; +} drm_radeon_stipple_t; + +typedef struct drm_radeon_indirect { + int idx; + int start; + int end; + int discard; +} drm_radeon_indirect_t; + +/* enum for card type parameters */ +#define RADEON_CARD_PCI 0 +#define RADEON_CARD_AGP 1 +#define RADEON_CARD_PCIE 2 + +/* 1.3: An ioctl to get parameters that aren't available to the 3d + * client any other way. + */ +#define RADEON_PARAM_GART_BUFFER_OFFSET 1 /* card offset of 1st GART buffer */ +#define RADEON_PARAM_LAST_FRAME 2 +#define RADEON_PARAM_LAST_DISPATCH 3 +#define RADEON_PARAM_LAST_CLEAR 4 +/* Added with DRM version 1.6. */ +#define RADEON_PARAM_IRQ_NR 5 +#define RADEON_PARAM_GART_BASE 6 /* card offset of GART base */ +/* Added with DRM version 1.8. */ +#define RADEON_PARAM_REGISTER_HANDLE 7 /* for drmMap() */ +#define RADEON_PARAM_STATUS_HANDLE 8 +#define RADEON_PARAM_SAREA_HANDLE 9 +#define RADEON_PARAM_GART_TEX_HANDLE 10 +#define RADEON_PARAM_SCRATCH_OFFSET 11 +#define RADEON_PARAM_CARD_TYPE 12 +#define RADEON_PARAM_VBLANK_CRTC 13 /* VBLANK CRTC */ +#define RADEON_PARAM_FB_LOCATION 14 /* FB location */ +#define RADEON_PARAM_NUM_GB_PIPES 15 /* num GB pipes */ +#define RADEON_PARAM_DEVICE_ID 16 +#define RADEON_PARAM_NUM_Z_PIPES 17 /* num Z pipes */ + +typedef struct drm_radeon_getparam { + int param; + void *value; +} drm_radeon_getparam_t; + +/* 1.6: Set up a memory manager for regions of shared memory: + */ +#define RADEON_MEM_REGION_GART 1 +#define RADEON_MEM_REGION_FB 2 + +typedef struct drm_radeon_mem_alloc { + int region; + int alignment; + int size; + int *region_offset; /* offset from start of fb or GART */ +} drm_radeon_mem_alloc_t; + +typedef struct drm_radeon_mem_free { + int region; + int region_offset; +} drm_radeon_mem_free_t; + +typedef struct drm_radeon_mem_init_heap { + int region; + int size; + int start; +} drm_radeon_mem_init_heap_t; + +/* 1.6: Userspace can request & wait on irq's: + */ +typedef struct drm_radeon_irq_emit { + int *irq_seq; +} drm_radeon_irq_emit_t; + +typedef struct drm_radeon_irq_wait { + int irq_seq; +} drm_radeon_irq_wait_t; + +/* 1.10: Clients tell the DRM where they think the framebuffer is located in + * the card's address space, via a new generic ioctl to set parameters + */ + +typedef struct drm_radeon_setparam { + unsigned int param; + __s64 value; +} drm_radeon_setparam_t; + +#define RADEON_SETPARAM_FB_LOCATION 1 /* determined framebuffer location */ +#define RADEON_SETPARAM_SWITCH_TILING 2 /* enable/disable color tiling */ +#define RADEON_SETPARAM_PCIGART_LOCATION 3 /* PCI Gart Location */ +#define RADEON_SETPARAM_NEW_MEMMAP 4 /* Use new memory map */ +#define RADEON_SETPARAM_PCIGART_TABLE_SIZE 5 /* PCI GART Table Size */ +#define RADEON_SETPARAM_VBLANK_CRTC 6 /* VBLANK CRTC */ +/* 1.14: Clients can allocate/free a surface + */ +typedef struct drm_radeon_surface_alloc { + unsigned int address; + unsigned int size; + unsigned int flags; +} drm_radeon_surface_alloc_t; + +typedef struct drm_radeon_surface_free { + unsigned int address; +} drm_radeon_surface_free_t; + +#define DRM_RADEON_VBLANK_CRTC1 1 +#define DRM_RADEON_VBLANK_CRTC2 2 + +/* + * Kernel modesetting world below. + */ +#define RADEON_GEM_DOMAIN_CPU 0x1 +#define RADEON_GEM_DOMAIN_GTT 0x2 +#define RADEON_GEM_DOMAIN_VRAM 0x4 + +struct drm_radeon_gem_info { + __u64 gart_size; + __u64 vram_size; + __u64 vram_visible; +}; + +#define RADEON_GEM_NO_BACKING_STORE (1 << 0) +#define RADEON_GEM_GTT_UC (1 << 1) +#define RADEON_GEM_GTT_WC (1 << 2) +/* BO is expected to be accessed by the CPU */ +#define RADEON_GEM_CPU_ACCESS (1 << 3) +/* CPU access is not expected to work for this BO */ +#define RADEON_GEM_NO_CPU_ACCESS (1 << 4) + +struct drm_radeon_gem_create { + __u64 size; + __u64 alignment; + __u32 handle; + __u32 initial_domain; + __u32 flags; +}; + +/* + * This is not a reliable API and you should expect it to fail for any + * number of reasons and have fallback path that do not use userptr to + * perform any operation. + */ +#define RADEON_GEM_USERPTR_READONLY (1 << 0) +#define RADEON_GEM_USERPTR_ANONONLY (1 << 1) +#define RADEON_GEM_USERPTR_VALIDATE (1 << 2) +#define RADEON_GEM_USERPTR_REGISTER (1 << 3) + +struct drm_radeon_gem_userptr { + __u64 addr; + __u64 size; + __u32 flags; + __u32 handle; +}; + +#define RADEON_TILING_MACRO 0x1 +#define RADEON_TILING_MICRO 0x2 +#define RADEON_TILING_SWAP_16BIT 0x4 +#define RADEON_TILING_SWAP_32BIT 0x8 +/* this object requires a surface when mapped - i.e. front buffer */ +#define RADEON_TILING_SURFACE 0x10 +#define RADEON_TILING_MICRO_SQUARE 0x20 +#define RADEON_TILING_EG_BANKW_SHIFT 8 +#define RADEON_TILING_EG_BANKW_MASK 0xf +#define RADEON_TILING_EG_BANKH_SHIFT 12 +#define RADEON_TILING_EG_BANKH_MASK 0xf +#define RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT 16 +#define RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK 0xf +#define RADEON_TILING_EG_TILE_SPLIT_SHIFT 24 +#define RADEON_TILING_EG_TILE_SPLIT_MASK 0xf +#define RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT 28 +#define RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK 0xf + +struct drm_radeon_gem_set_tiling { + __u32 handle; + __u32 tiling_flags; + __u32 pitch; +}; + +struct drm_radeon_gem_get_tiling { + __u32 handle; + __u32 tiling_flags; + __u32 pitch; +}; + +struct drm_radeon_gem_mmap { + __u32 handle; + __u32 pad; + __u64 offset; + __u64 size; + __u64 addr_ptr; +}; + +struct drm_radeon_gem_set_domain { + __u32 handle; + __u32 read_domains; + __u32 write_domain; +}; + +struct drm_radeon_gem_wait_idle { + __u32 handle; + __u32 pad; +}; + +struct drm_radeon_gem_busy { + __u32 handle; + __u32 domain; +}; + +struct drm_radeon_gem_pread { + /** Handle for the object being read. */ + __u32 handle; + __u32 pad; + /** Offset into the object to read from */ + __u64 offset; + /** Length of data to read */ + __u64 size; + /** Pointer to write the data into. */ + /* void *, but pointers are not 32/64 compatible */ + __u64 data_ptr; +}; + +struct drm_radeon_gem_pwrite { + /** Handle for the object being written to. */ + __u32 handle; + __u32 pad; + /** Offset into the object to write to */ + __u64 offset; + /** Length of data to write */ + __u64 size; + /** Pointer to read the data from. */ + /* void *, but pointers are not 32/64 compatible */ + __u64 data_ptr; +}; + +/* Sets or returns a value associated with a buffer. */ +struct drm_radeon_gem_op { + __u32 handle; /* buffer */ + __u32 op; /* RADEON_GEM_OP_* */ + __u64 value; /* input or return value */ +}; + +#define RADEON_GEM_OP_GET_INITIAL_DOMAIN 0 +#define RADEON_GEM_OP_SET_INITIAL_DOMAIN 1 + +#define RADEON_VA_MAP 1 +#define RADEON_VA_UNMAP 2 + +#define RADEON_VA_RESULT_OK 0 +#define RADEON_VA_RESULT_ERROR 1 +#define RADEON_VA_RESULT_VA_EXIST 2 + +#define RADEON_VM_PAGE_VALID (1 << 0) +#define RADEON_VM_PAGE_READABLE (1 << 1) +#define RADEON_VM_PAGE_WRITEABLE (1 << 2) +#define RADEON_VM_PAGE_SYSTEM (1 << 3) +#define RADEON_VM_PAGE_SNOOPED (1 << 4) + +struct drm_radeon_gem_va { + __u32 handle; + __u32 operation; + __u32 vm_id; + __u32 flags; + __u64 offset; +}; + +#define RADEON_CHUNK_ID_RELOCS 0x01 +#define RADEON_CHUNK_ID_IB 0x02 +#define RADEON_CHUNK_ID_FLAGS 0x03 +#define RADEON_CHUNK_ID_CONST_IB 0x04 + +/* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */ +#define RADEON_CS_KEEP_TILING_FLAGS 0x01 +#define RADEON_CS_USE_VM 0x02 +#define RADEON_CS_END_OF_FRAME \ + 0x04 /* a hint from userspace which CS is the last one */ +/* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type + */ +#define RADEON_CS_RING_GFX 0 +#define RADEON_CS_RING_COMPUTE 1 +#define RADEON_CS_RING_DMA 2 +#define RADEON_CS_RING_UVD 3 +#define RADEON_CS_RING_VCE 4 +/* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority + */ +/* 0 = normal, + = higher priority, - = lower priority */ + +struct drm_radeon_cs_chunk { + __u32 chunk_id; + __u32 length_dw; + __u64 chunk_data; +}; + +/* drm_radeon_cs_reloc.flags */ +#define RADEON_RELOC_PRIO_MASK (0xf << 0) + +struct drm_radeon_cs_reloc { + __u32 handle; + __u32 read_domains; + __u32 write_domain; + __u32 flags; +}; + +struct drm_radeon_cs { + __u32 num_chunks; + __u32 cs_id; + /* this points to __u64 * which point to cs chunks */ + __u64 chunks; + /* updates to the limits after this CS ioctl */ + __u64 gart_limit; + __u64 vram_limit; +}; + +#define RADEON_INFO_DEVICE_ID 0x00 +#define RADEON_INFO_NUM_GB_PIPES 0x01 +#define RADEON_INFO_NUM_Z_PIPES 0x02 +#define RADEON_INFO_ACCEL_WORKING 0x03 +#define RADEON_INFO_CRTC_FROM_ID 0x04 +#define RADEON_INFO_ACCEL_WORKING2 0x05 +#define RADEON_INFO_TILING_CONFIG 0x06 +#define RADEON_INFO_WANT_HYPERZ 0x07 +#define RADEON_INFO_WANT_CMASK 0x08 /* get access to CMASK on r300 */ +#define RADEON_INFO_CLOCK_CRYSTAL_FREQ 0x09 /* clock crystal frequency */ +#define RADEON_INFO_NUM_BACKENDS 0x0a /* DB/backends for r600+ - need for OQ \ + */ +#define RADEON_INFO_NUM_TILE_PIPES 0x0b /* tile pipes for r600+ */ +#define RADEON_INFO_FUSION_GART_WORKING \ + 0x0c /* fusion writes to GTT were broken before this */ +#define RADEON_INFO_BACKEND_MAP 0x0d /* pipe to backend map, needed by mesa */ +/* virtual address start, va < start are reserved by the kernel */ +#define RADEON_INFO_VA_START 0x0e +/* maximum size of ib using the virtual memory cs */ +#define RADEON_INFO_IB_VM_MAX_SIZE 0x0f +/* max pipes - needed for compute shaders */ +#define RADEON_INFO_MAX_PIPES 0x10 +/* timestamp for GL_ARB_timer_query (OpenGL), returns the current GPU clock */ +#define RADEON_INFO_TIMESTAMP 0x11 +/* max shader engines (SE) - needed for geometry shaders, etc. */ +#define RADEON_INFO_MAX_SE 0x12 +/* max SH per SE */ +#define RADEON_INFO_MAX_SH_PER_SE 0x13 +/* fast fb access is enabled */ +#define RADEON_INFO_FASTFB_WORKING 0x14 +/* query if a RADEON_CS_RING_* submission is supported */ +#define RADEON_INFO_RING_WORKING 0x15 +/* SI tile mode array */ +#define RADEON_INFO_SI_TILE_MODE_ARRAY 0x16 +/* query if CP DMA is supported on the compute ring */ +#define RADEON_INFO_SI_CP_DMA_COMPUTE 0x17 +/* CIK macrotile mode array */ +#define RADEON_INFO_CIK_MACROTILE_MODE_ARRAY 0x18 +/* query the number of render backends */ +#define RADEON_INFO_SI_BACKEND_ENABLED_MASK 0x19 +/* max engine clock - needed for OpenCL */ +#define RADEON_INFO_MAX_SCLK 0x1a +/* version of VCE firmware */ +#define RADEON_INFO_VCE_FW_VERSION 0x1b +/* version of VCE feedback */ +#define RADEON_INFO_VCE_FB_VERSION 0x1c +#define RADEON_INFO_NUM_BYTES_MOVED 0x1d +#define RADEON_INFO_VRAM_USAGE 0x1e +#define RADEON_INFO_GTT_USAGE 0x1f +#define RADEON_INFO_ACTIVE_CU_COUNT 0x20 +#define RADEON_INFO_CURRENT_GPU_TEMP 0x21 +#define RADEON_INFO_CURRENT_GPU_SCLK 0x22 +#define RADEON_INFO_CURRENT_GPU_MCLK 0x23 +#define RADEON_INFO_READ_REG 0x24 +#define RADEON_INFO_VA_UNMAP_WORKING 0x25 +#define RADEON_INFO_GPU_RESET_COUNTER 0x26 + +struct drm_radeon_info { + __u32 request; + __u32 pad; + __u64 value; +}; + +/* Those correspond to the tile index to use, this is to explicitly state + * the API that is implicitly defined by the tile mode array. + */ +#define SI_TILE_MODE_COLOR_LINEAR_ALIGNED 8 +#define SI_TILE_MODE_COLOR_1D 13 +#define SI_TILE_MODE_COLOR_1D_SCANOUT 9 +#define SI_TILE_MODE_COLOR_2D_8BPP 14 +#define SI_TILE_MODE_COLOR_2D_16BPP 15 +#define SI_TILE_MODE_COLOR_2D_32BPP 16 +#define SI_TILE_MODE_COLOR_2D_64BPP 17 +#define SI_TILE_MODE_COLOR_2D_SCANOUT_16BPP 11 +#define SI_TILE_MODE_COLOR_2D_SCANOUT_32BPP 12 +#define SI_TILE_MODE_DEPTH_STENCIL_1D 4 +#define SI_TILE_MODE_DEPTH_STENCIL_2D 0 +#define SI_TILE_MODE_DEPTH_STENCIL_2D_2AA 3 +#define SI_TILE_MODE_DEPTH_STENCIL_2D_4AA 3 +#define SI_TILE_MODE_DEPTH_STENCIL_2D_8AA 2 + +#define CIK_TILE_MODE_DEPTH_STENCIL_1D 5 + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/include/arch/x86_64/drm/tegra_drm.h b/include/arch/x86_64/drm/tegra_drm.h new file mode 100644 index 00000000..2219bf94 --- /dev/null +++ b/include/arch/x86_64/drm/tegra_drm.h @@ -0,0 +1,1095 @@ +/* SPDX-License-Identifier: MIT */ +/* Copyright (c) 2012-2020 NVIDIA Corporation */ + +#ifndef _TEGRA_DRM_H_ +#define _TEGRA_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Tegra DRM legacy UAPI. Only enabled with STAGING */ + +#define DRM_TEGRA_GEM_CREATE_TILED (1 << 0) +#define DRM_TEGRA_GEM_CREATE_BOTTOM_UP (1 << 1) + +/** + * struct drm_tegra_gem_create - parameters for the GEM object creation IOCTL + */ +struct drm_tegra_gem_create { + /** + * @size: + * + * The size, in bytes, of the buffer object to be created. + */ + __u64 size; + + /** + * @flags: + * + * A bitmask of flags that influence the creation of GEM objects: + * + * DRM_TEGRA_GEM_CREATE_TILED + * Use the 16x16 tiling format for this buffer. + * + * DRM_TEGRA_GEM_CREATE_BOTTOM_UP + * The buffer has a bottom-up layout. + */ + __u32 flags; + + /** + * @handle: + * + * The handle of the created GEM object. Set by the kernel upon + * successful completion of the IOCTL. + */ + __u32 handle; +}; + +/** + * struct drm_tegra_gem_mmap - parameters for the GEM mmap IOCTL + */ +struct drm_tegra_gem_mmap { + /** + * @handle: + * + * Handle of the GEM object to obtain an mmap offset for. + */ + __u32 handle; + + /** + * @pad: + * + * Structure padding that may be used in the future. Must be 0. + */ + __u32 pad; + + /** + * @offset: + * + * The mmap offset for the given GEM object. Set by the kernel upon + * successful completion of the IOCTL. + */ + __u64 offset; +}; + +/** + * struct drm_tegra_syncpt_read - parameters for the read syncpoint IOCTL + */ +struct drm_tegra_syncpt_read { + /** + * @id: + * + * ID of the syncpoint to read the current value from. + */ + __u32 id; + + /** + * @value: + * + * The current syncpoint value. Set by the kernel upon successful + * completion of the IOCTL. + */ + __u32 value; +}; + +/** + * struct drm_tegra_syncpt_incr - parameters for the increment syncpoint IOCTL + */ +struct drm_tegra_syncpt_incr { + /** + * @id: + * + * ID of the syncpoint to increment. + */ + __u32 id; + + /** + * @pad: + * + * Structure padding that may be used in the future. Must be 0. + */ + __u32 pad; +}; + +/** + * struct drm_tegra_syncpt_wait - parameters for the wait syncpoint IOCTL + */ +struct drm_tegra_syncpt_wait { + /** + * @id: + * + * ID of the syncpoint to wait on. + */ + __u32 id; + + /** + * @thresh: + * + * Threshold value for which to wait. + */ + __u32 thresh; + + /** + * @timeout: + * + * Timeout, in milliseconds, to wait. + */ + __u32 timeout; + + /** + * @value: + * + * The new syncpoint value after the wait. Set by the kernel upon + * successful completion of the IOCTL. + */ + __u32 value; +}; + +#define DRM_TEGRA_NO_TIMEOUT (0xffffffff) + +/** + * struct drm_tegra_open_channel - parameters for the open channel IOCTL + */ +struct drm_tegra_open_channel { + /** + * @client: + * + * The client ID for this channel. + */ + __u32 client; + + /** + * @pad: + * + * Structure padding that may be used in the future. Must be 0. + */ + __u32 pad; + + /** + * @context: + * + * The application context of this channel. Set by the kernel upon + * successful completion of the IOCTL. This context needs to be passed + * to the DRM_TEGRA_CHANNEL_CLOSE or the DRM_TEGRA_SUBMIT IOCTLs. + */ + __u64 context; +}; + +/** + * struct drm_tegra_close_channel - parameters for the close channel IOCTL + */ +struct drm_tegra_close_channel { + /** + * @context: + * + * The application context of this channel. This is obtained from the + * DRM_TEGRA_OPEN_CHANNEL IOCTL. + */ + __u64 context; +}; + +/** + * struct drm_tegra_get_syncpt - parameters for the get syncpoint IOCTL + */ +struct drm_tegra_get_syncpt { + /** + * @context: + * + * The application context identifying the channel for which to obtain + * the syncpoint ID. + */ + __u64 context; + + /** + * @index: + * + * Index of the client syncpoint for which to obtain the ID. + */ + __u32 index; + + /** + * @id: + * + * The ID of the given syncpoint. Set by the kernel upon successful + * completion of the IOCTL. + */ + __u32 id; +}; + +/** + * struct drm_tegra_get_syncpt_base - parameters for the get wait base IOCTL + */ +struct drm_tegra_get_syncpt_base { + /** + * @context: + * + * The application context identifying for which channel to obtain the + * wait base. + */ + __u64 context; + + /** + * @syncpt: + * + * ID of the syncpoint for which to obtain the wait base. + */ + __u32 syncpt; + + /** + * @id: + * + * The ID of the wait base corresponding to the client syncpoint. Set + * by the kernel upon successful completion of the IOCTL. + */ + __u32 id; +}; + +/** + * struct drm_tegra_syncpt - syncpoint increment operation + */ +struct drm_tegra_syncpt { + /** + * @id: + * + * ID of the syncpoint to operate on. + */ + __u32 id; + + /** + * @incrs: + * + * Number of increments to perform for the syncpoint. + */ + __u32 incrs; +}; + +/** + * struct drm_tegra_cmdbuf - structure describing a command buffer + */ +struct drm_tegra_cmdbuf { + /** + * @handle: + * + * Handle to a GEM object containing the command buffer. + */ + __u32 handle; + + /** + * @offset: + * + * Offset, in bytes, into the GEM object identified by @handle at + * which the command buffer starts. + */ + __u32 offset; + + /** + * @words: + * + * Number of 32-bit words in this command buffer. + */ + __u32 words; + + /** + * @pad: + * + * Structure padding that may be used in the future. Must be 0. + */ + __u32 pad; +}; + +/** + * struct drm_tegra_reloc - GEM object relocation structure + */ +struct drm_tegra_reloc { + struct { + /** + * @cmdbuf.handle: + * + * Handle to the GEM object containing the command buffer for + * which to perform this GEM object relocation. + */ + __u32 handle; + + /** + * @cmdbuf.offset: + * + * Offset, in bytes, into the command buffer at which to + * insert the relocated address. + */ + __u32 offset; + } cmdbuf; + struct { + /** + * @target.handle: + * + * Handle to the GEM object to be relocated. + */ + __u32 handle; + + /** + * @target.offset: + * + * Offset, in bytes, into the target GEM object at which the + * relocated data starts. + */ + __u32 offset; + } target; + + /** + * @shift: + * + * The number of bits by which to shift relocated addresses. + */ + __u32 shift; + + /** + * @pad: + * + * Structure padding that may be used in the future. Must be 0. + */ + __u32 pad; +}; + +/** + * struct drm_tegra_waitchk - wait check structure + */ +struct drm_tegra_waitchk { + /** + * @handle: + * + * Handle to the GEM object containing a command stream on which to + * perform the wait check. + */ + __u32 handle; + + /** + * @offset: + * + * Offset, in bytes, of the location in the command stream to perform + * the wait check on. + */ + __u32 offset; + + /** + * @syncpt: + * + * ID of the syncpoint to wait check. + */ + __u32 syncpt; + + /** + * @thresh: + * + * Threshold value for which to check. + */ + __u32 thresh; +}; + +/** + * struct drm_tegra_submit - job submission structure + */ +struct drm_tegra_submit { + /** + * @context: + * + * The application context identifying the channel to use for the + * execution of this job. + */ + __u64 context; + + /** + * @num_syncpts: + * + * The number of syncpoints operated on by this job. This defines the + * length of the array pointed to by @syncpts. + */ + __u32 num_syncpts; + + /** + * @num_cmdbufs: + * + * The number of command buffers to execute as part of this job. This + * defines the length of the array pointed to by @cmdbufs. + */ + __u32 num_cmdbufs; + + /** + * @num_relocs: + * + * The number of relocations to perform before executing this job. + * This defines the length of the array pointed to by @relocs. + */ + __u32 num_relocs; + + /** + * @num_waitchks: + * + * The number of wait checks to perform as part of this job. This + * defines the length of the array pointed to by @waitchks. + */ + __u32 num_waitchks; + + /** + * @waitchk_mask: + * + * Bitmask of valid wait checks. + */ + __u32 waitchk_mask; + + /** + * @timeout: + * + * Timeout, in milliseconds, before this job is cancelled. + */ + __u32 timeout; + + /** + * @syncpts: + * + * A pointer to an array of &struct drm_tegra_syncpt structures that + * specify the syncpoint operations performed as part of this job. + * The number of elements in the array must be equal to the value + * given by @num_syncpts. + */ + __u64 syncpts; + + /** + * @cmdbufs: + * + * A pointer to an array of &struct drm_tegra_cmdbuf structures that + * define the command buffers to execute as part of this job. The + * number of elements in the array must be equal to the value given + * by @num_syncpts. + */ + __u64 cmdbufs; + + /** + * @relocs: + * + * A pointer to an array of &struct drm_tegra_reloc structures that + * specify the relocations that need to be performed before executing + * this job. The number of elements in the array must be equal to the + * value given by @num_relocs. + */ + __u64 relocs; + + /** + * @waitchks: + * + * A pointer to an array of &struct drm_tegra_waitchk structures that + * specify the wait checks to be performed while executing this job. + * The number of elements in the array must be equal to the value + * given by @num_waitchks. + */ + __u64 waitchks; + + /** + * @fence: + * + * The threshold of the syncpoint associated with this job after it + * has been completed. Set by the kernel upon successful completion of + * the IOCTL. This can be used with the DRM_TEGRA_SYNCPT_WAIT IOCTL to + * wait for this job to be finished. + */ + __u32 fence; + + /** + * @reserved: + * + * This field is reserved for future use. Must be 0. + */ + __u32 reserved[5]; +}; + +#define DRM_TEGRA_GEM_TILING_MODE_PITCH 0 +#define DRM_TEGRA_GEM_TILING_MODE_TILED 1 +#define DRM_TEGRA_GEM_TILING_MODE_BLOCK 2 + +/** + * struct drm_tegra_gem_set_tiling - parameters for the set tiling IOCTL + */ +struct drm_tegra_gem_set_tiling { + /** + * @handle: + * + * Handle to the GEM object for which to set the tiling parameters. + */ + __u32 handle; + + /** + * @mode: + * + * The tiling mode to set. Must be one of: + * + * DRM_TEGRA_GEM_TILING_MODE_PITCH + * pitch linear format + * + * DRM_TEGRA_GEM_TILING_MODE_TILED + * 16x16 tiling format + * + * DRM_TEGRA_GEM_TILING_MODE_BLOCK + * 16Bx2 tiling format + */ + __u32 mode; + + /** + * @value: + * + * The value to set for the tiling mode parameter. + */ + __u32 value; + + /** + * @pad: + * + * Structure padding that may be used in the future. Must be 0. + */ + __u32 pad; +}; + +/** + * struct drm_tegra_gem_get_tiling - parameters for the get tiling IOCTL + */ +struct drm_tegra_gem_get_tiling { + /** + * @handle: + * + * Handle to the GEM object for which to query the tiling parameters. + */ + __u32 handle; + + /** + * @mode: + * + * The tiling mode currently associated with the GEM object. Set by + * the kernel upon successful completion of the IOCTL. + */ + __u32 mode; + + /** + * @value: + * + * The tiling mode parameter currently associated with the GEM object. + * Set by the kernel upon successful completion of the IOCTL. + */ + __u32 value; + + /** + * @pad: + * + * Structure padding that may be used in the future. Must be 0. + */ + __u32 pad; +}; + +#define DRM_TEGRA_GEM_BOTTOM_UP (1 << 0) +#define DRM_TEGRA_GEM_FLAGS (DRM_TEGRA_GEM_BOTTOM_UP) + +/** + * struct drm_tegra_gem_set_flags - parameters for the set flags IOCTL + */ +struct drm_tegra_gem_set_flags { + /** + * @handle: + * + * Handle to the GEM object for which to set the flags. + */ + __u32 handle; + + /** + * @flags: + * + * The flags to set for the GEM object. + */ + __u32 flags; +}; + +/** + * struct drm_tegra_gem_get_flags - parameters for the get flags IOCTL + */ +struct drm_tegra_gem_get_flags { + /** + * @handle: + * + * Handle to the GEM object for which to query the flags. + */ + __u32 handle; + + /** + * @flags: + * + * The flags currently associated with the GEM object. Set by the + * kernel upon successful completion of the IOCTL. + */ + __u32 flags; +}; + +#define DRM_TEGRA_GEM_CREATE 0x00 +#define DRM_TEGRA_GEM_MMAP 0x01 +#define DRM_TEGRA_SYNCPT_READ 0x02 +#define DRM_TEGRA_SYNCPT_INCR 0x03 +#define DRM_TEGRA_SYNCPT_WAIT 0x04 +#define DRM_TEGRA_OPEN_CHANNEL 0x05 +#define DRM_TEGRA_CLOSE_CHANNEL 0x06 +#define DRM_TEGRA_GET_SYNCPT 0x07 +#define DRM_TEGRA_SUBMIT 0x08 +#define DRM_TEGRA_GET_SYNCPT_BASE 0x09 +#define DRM_TEGRA_GEM_SET_TILING 0x0a +#define DRM_TEGRA_GEM_GET_TILING 0x0b +#define DRM_TEGRA_GEM_SET_FLAGS 0x0c +#define DRM_TEGRA_GEM_GET_FLAGS 0x0d + +#define DRM_IOCTL_TEGRA_GEM_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_CREATE, \ + struct drm_tegra_gem_create) +#define DRM_IOCTL_TEGRA_GEM_MMAP \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_MMAP, \ + struct drm_tegra_gem_mmap) +#define DRM_IOCTL_TEGRA_SYNCPT_READ \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SYNCPT_READ, \ + struct drm_tegra_syncpt_read) +#define DRM_IOCTL_TEGRA_SYNCPT_INCR \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SYNCPT_INCR, \ + struct drm_tegra_syncpt_incr) +#define DRM_IOCTL_TEGRA_SYNCPT_WAIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SYNCPT_WAIT, \ + struct drm_tegra_syncpt_wait) +#define DRM_IOCTL_TEGRA_OPEN_CHANNEL \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_OPEN_CHANNEL, \ + struct drm_tegra_open_channel) +#define DRM_IOCTL_TEGRA_CLOSE_CHANNEL \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_CLOSE_CHANNEL, \ + struct drm_tegra_close_channel) +#define DRM_IOCTL_TEGRA_GET_SYNCPT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GET_SYNCPT, \ + struct drm_tegra_get_syncpt) +#define DRM_IOCTL_TEGRA_SUBMIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SUBMIT, struct drm_tegra_submit) +#define DRM_IOCTL_TEGRA_GET_SYNCPT_BASE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GET_SYNCPT_BASE, \ + struct drm_tegra_get_syncpt_base) +#define DRM_IOCTL_TEGRA_GEM_SET_TILING \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_SET_TILING, \ + struct drm_tegra_gem_set_tiling) +#define DRM_IOCTL_TEGRA_GEM_GET_TILING \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_GET_TILING, \ + struct drm_tegra_gem_get_tiling) +#define DRM_IOCTL_TEGRA_GEM_SET_FLAGS \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_SET_FLAGS, \ + struct drm_tegra_gem_set_flags) +#define DRM_IOCTL_TEGRA_GEM_GET_FLAGS \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_GET_FLAGS, \ + struct drm_tegra_gem_get_flags) + +/* New Tegra DRM UAPI */ + +/* + * Reported by the driver in the `capabilities` field. + * + * DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT: If set, the engine is cache coherent + * with regard to the system memory. + */ +#define DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT (1 << 0) + +struct drm_tegra_channel_open { + /** + * @host1x_class: [in] + * + * Host1x class of the engine that will be programmed using this + * channel. + */ + __u32 host1x_class; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * @context: [out] + * + * Opaque identifier corresponding to the opened channel. + */ + __u32 context; + + /** + * @version: [out] + * + * Version of the engine hardware. This can be used by userspace + * to determine how the engine needs to be programmed. + */ + __u32 version; + + /** + * @capabilities: [out] + * + * Flags describing the hardware capabilities. + */ + __u32 capabilities; + __u32 padding; +}; + +struct drm_tegra_channel_close { + /** + * @context: [in] + * + * Identifier of the channel to close. + */ + __u32 context; + __u32 padding; +}; + +/* + * Mapping flags that can be used to influence how the mapping is created. + * + * DRM_TEGRA_CHANNEL_MAP_READ: create mapping that allows HW read access + * DRM_TEGRA_CHANNEL_MAP_WRITE: create mapping that allows HW write access + */ +#define DRM_TEGRA_CHANNEL_MAP_READ (1 << 0) +#define DRM_TEGRA_CHANNEL_MAP_WRITE (1 << 1) +#define DRM_TEGRA_CHANNEL_MAP_READ_WRITE \ + (DRM_TEGRA_CHANNEL_MAP_READ | DRM_TEGRA_CHANNEL_MAP_WRITE) + +struct drm_tegra_channel_map { + /** + * @context: [in] + * + * Identifier of the channel to which make memory available for. + */ + __u32 context; + + /** + * @handle: [in] + * + * GEM handle of the memory to map. + */ + __u32 handle; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * @mapping: [out] + * + * Identifier corresponding to the mapping, to be used for + * relocations or unmapping later. + */ + __u32 mapping; +}; + +struct drm_tegra_channel_unmap { + /** + * @context: [in] + * + * Channel identifier of the channel to unmap memory from. + */ + __u32 context; + + /** + * @mapping: [in] + * + * Mapping identifier of the memory mapping to unmap. + */ + __u32 mapping; +}; + +/* Submission */ + +/** + * Specify that bit 39 of the patched-in address should be set to switch + * swizzling between Tegra and non-Tegra sector layout on systems that store + * surfaces in system memory in non-Tegra sector layout. + */ +#define DRM_TEGRA_SUBMIT_RELOC_SECTOR_LAYOUT (1 << 0) + +struct drm_tegra_submit_buf { + /** + * @mapping: [in] + * + * Identifier of the mapping to use in the submission. + */ + __u32 mapping; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * Information for relocation patching. + */ + struct { + /** + * @target_offset: [in] + * + * Offset from the start of the mapping of the data whose + * address is to be patched into the gather. + */ + __u64 target_offset; + + /** + * @gather_offset_words: [in] + * + * Offset in words from the start of the gather data to + * where the address should be patched into. + */ + __u32 gather_offset_words; + + /** + * @shift: [in] + * + * Number of bits the address should be shifted right before + * patching in. + */ + __u32 shift; + } reloc; +}; + +/** + * Execute `words` words of Host1x opcodes specified in the `gather_data_ptr` + * buffer. Each GATHER_UPTR command uses successive words from the buffer. + */ +#define DRM_TEGRA_SUBMIT_CMD_GATHER_UPTR 0 +/** + * Wait for a syncpoint to reach a value before continuing with further + * commands. + */ +#define DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT 1 +/** + * Wait for a syncpoint to reach a value before continuing with further + * commands. The threshold is calculated relative to the start of the job. + */ +#define DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT_RELATIVE 2 + +struct drm_tegra_submit_cmd_gather_uptr { + __u32 words; + __u32 reserved[3]; +}; + +struct drm_tegra_submit_cmd_wait_syncpt { + __u32 id; + __u32 value; + __u32 reserved[2]; +}; + +struct drm_tegra_submit_cmd { + /** + * @type: [in] + * + * Command type to execute. One of the DRM_TEGRA_SUBMIT_CMD* + * defines. + */ + __u32 type; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + union { + struct drm_tegra_submit_cmd_gather_uptr gather_uptr; + struct drm_tegra_submit_cmd_wait_syncpt wait_syncpt; + __u32 reserved[4]; + }; +}; + +struct drm_tegra_submit_syncpt { + /** + * @id: [in] + * + * ID of the syncpoint that the job will increment. + */ + __u32 id; + + /** + * @flags: [in] + * + * Flags. + */ + __u32 flags; + + /** + * @increments: [in] + * + * Number of times the job will increment this syncpoint. + */ + __u32 increments; + + /** + * @value: [out] + * + * Value the syncpoint will have once the job has completed all + * its specified syncpoint increments. + * + * Note that the kernel may increment the syncpoint before or after + * the job. These increments are not reflected in this field. + * + * If the job hangs or times out, not all of the increments may + * get executed. + */ + __u32 value; +}; + +struct drm_tegra_channel_submit { + /** + * @context: [in] + * + * Identifier of the channel to submit this job to. + */ + __u32 context; + + /** + * @num_bufs: [in] + * + * Number of elements in the `bufs_ptr` array. + */ + __u32 num_bufs; + + /** + * @num_cmds: [in] + * + * Number of elements in the `cmds_ptr` array. + */ + __u32 num_cmds; + + /** + * @gather_data_words: [in] + * + * Number of 32-bit words in the `gather_data_ptr` array. + */ + __u32 gather_data_words; + + /** + * @bufs_ptr: [in] + * + * Pointer to an array of drm_tegra_submit_buf structures. + */ + __u64 bufs_ptr; + + /** + * @cmds_ptr: [in] + * + * Pointer to an array of drm_tegra_submit_cmd structures. + */ + __u64 cmds_ptr; + + /** + * @gather_data_ptr: [in] + * + * Pointer to an array of Host1x opcodes to be used by GATHER_UPTR + * commands. + */ + __u64 gather_data_ptr; + + /** + * @syncobj_in: [in] + * + * Handle for DRM syncobj that will be waited before submission. + * Ignored if zero. + */ + __u32 syncobj_in; + + /** + * @syncobj_out: [in] + * + * Handle for DRM syncobj that will have its fence replaced with + * the job's completion fence. Ignored if zero. + */ + __u32 syncobj_out; + + /** + * @syncpt_incr: [in,out] + * + * Information about the syncpoint the job will increment. + */ + struct drm_tegra_submit_syncpt syncpt; +}; + +struct drm_tegra_syncpoint_allocate { + /** + * @id: [out] + * + * ID of allocated syncpoint. + */ + __u32 id; + __u32 padding; +}; + +struct drm_tegra_syncpoint_free { + /** + * @id: [in] + * + * ID of syncpoint to free. + */ + __u32 id; + __u32 padding; +}; + +struct drm_tegra_syncpoint_wait { + /** + * @timeout: [in] + * + * Absolute timestamp at which the wait will time out. + */ + __s64 timeout_ns; + + /** + * @id: [in] + * + * ID of syncpoint to wait on. + */ + __u32 id; + + /** + * @threshold: [in] + * + * Threshold to wait for. + */ + __u32 threshold; + + /** + * @value: [out] + * + * Value of the syncpoint upon wait completion. + */ + __u32 value; + + __u32 padding; +}; + +#define DRM_IOCTL_TEGRA_CHANNEL_OPEN \ + DRM_IOWR(DRM_COMMAND_BASE + 0x10, struct drm_tegra_channel_open) +#define DRM_IOCTL_TEGRA_CHANNEL_CLOSE \ + DRM_IOWR(DRM_COMMAND_BASE + 0x11, struct drm_tegra_channel_close) +#define DRM_IOCTL_TEGRA_CHANNEL_MAP \ + DRM_IOWR(DRM_COMMAND_BASE + 0x12, struct drm_tegra_channel_map) +#define DRM_IOCTL_TEGRA_CHANNEL_UNMAP \ + DRM_IOWR(DRM_COMMAND_BASE + 0x13, struct drm_tegra_channel_unmap) +#define DRM_IOCTL_TEGRA_CHANNEL_SUBMIT \ + DRM_IOWR(DRM_COMMAND_BASE + 0x14, struct drm_tegra_channel_submit) + +#define DRM_IOCTL_TEGRA_SYNCPOINT_ALLOCATE \ + DRM_IOWR(DRM_COMMAND_BASE + 0x20, struct drm_tegra_syncpoint_allocate) +#define DRM_IOCTL_TEGRA_SYNCPOINT_FREE \ + DRM_IOWR(DRM_COMMAND_BASE + 0x21, struct drm_tegra_syncpoint_free) +#define DRM_IOCTL_TEGRA_SYNCPOINT_WAIT \ + DRM_IOWR(DRM_COMMAND_BASE + 0x22, struct drm_tegra_syncpoint_wait) + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/include/arch/x86_64/drm/v3d_drm.h b/include/arch/x86_64/drm/v3d_drm.h new file mode 100644 index 00000000..1ae21341 --- /dev/null +++ b/include/arch/x86_64/drm/v3d_drm.h @@ -0,0 +1,813 @@ +/* + * Copyright © 2014-2018 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _V3D_DRM_H_ +#define _V3D_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_V3D_SUBMIT_CL 0x00 +#define DRM_V3D_WAIT_BO 0x01 +#define DRM_V3D_CREATE_BO 0x02 +#define DRM_V3D_MMAP_BO 0x03 +#define DRM_V3D_GET_PARAM 0x04 +#define DRM_V3D_GET_BO_OFFSET 0x05 +#define DRM_V3D_SUBMIT_TFU 0x06 +#define DRM_V3D_SUBMIT_CSD 0x07 +#define DRM_V3D_PERFMON_CREATE 0x08 +#define DRM_V3D_PERFMON_DESTROY 0x09 +#define DRM_V3D_PERFMON_GET_VALUES 0x0a +#define DRM_V3D_SUBMIT_CPU 0x0b +#define DRM_V3D_PERFMON_GET_COUNTER 0x0c +#define DRM_V3D_PERFMON_SET_GLOBAL 0x0d + +#define DRM_IOCTL_V3D_SUBMIT_CL \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) +#define DRM_IOCTL_V3D_WAIT_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) +#define DRM_IOCTL_V3D_CREATE_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_CREATE_BO, struct drm_v3d_create_bo) +#define DRM_IOCTL_V3D_MMAP_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo) +#define DRM_IOCTL_V3D_GET_PARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param) +#define DRM_IOCTL_V3D_GET_BO_OFFSET \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, \ + struct drm_v3d_get_bo_offset) +#define DRM_IOCTL_V3D_SUBMIT_TFU \ + DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, \ + struct drm_v3d_submit_tfu) +#define DRM_IOCTL_V3D_SUBMIT_CSD \ + DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, \ + struct drm_v3d_submit_csd) +#define DRM_IOCTL_V3D_PERFMON_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_CREATE, \ + struct drm_v3d_perfmon_create) +#define DRM_IOCTL_V3D_PERFMON_DESTROY \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_DESTROY, \ + struct drm_v3d_perfmon_destroy) +#define DRM_IOCTL_V3D_PERFMON_GET_VALUES \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_VALUES, \ + struct drm_v3d_perfmon_get_values) +#define DRM_IOCTL_V3D_SUBMIT_CPU \ + DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, \ + struct drm_v3d_submit_cpu) +#define DRM_IOCTL_V3D_PERFMON_GET_COUNTER \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, \ + struct drm_v3d_perfmon_get_counter) +#define DRM_IOCTL_V3D_PERFMON_SET_GLOBAL \ + DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_PERFMON_SET_GLOBAL, \ + struct drm_v3d_perfmon_set_global) + +#define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 +#define DRM_V3D_SUBMIT_EXTENSION 0x02 + +/* struct drm_v3d_extension - ioctl extensions + * + * Linked-list of generic extensions where the id identify which struct is + * pointed by ext_data. Therefore, DRM_V3D_EXT_ID_* is used on id to identify + * the extension type. + */ +struct drm_v3d_extension { + __u64 next; + __u32 id; +#define DRM_V3D_EXT_ID_MULTI_SYNC 0x01 +#define DRM_V3D_EXT_ID_CPU_INDIRECT_CSD 0x02 +#define DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY 0x03 +#define DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY 0x04 +#define DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY 0x05 +#define DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY 0x06 +#define DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY 0x07 + __u32 flags; /* mbz */ +}; + +/* struct drm_v3d_sem - wait/signal semaphore + * + * If binary semaphore, it only takes syncobj handle and ignores flags and + * point fields. Point is defined for timeline syncobj feature. + */ +struct drm_v3d_sem { + __u32 handle; /* syncobj */ + /* rsv below, for future uses */ + __u32 flags; + __u64 point; /* for timeline sem support */ + __u64 mbz[2]; /* must be zero, rsv */ +}; + +/* Enum for each of the V3D queues. */ +enum v3d_queue { + V3D_BIN, + V3D_RENDER, + V3D_TFU, + V3D_CSD, + V3D_CACHE_CLEAN, + V3D_CPU, +}; + +/** + * struct drm_v3d_multi_sync - ioctl extension to add support multiples + * syncobjs for commands submission. + * + * When an extension of DRM_V3D_EXT_ID_MULTI_SYNC id is defined, it points to + * this extension to define wait and signal dependencies, instead of single + * in/out sync entries on submitting commands. The field flags is used to + * determine the stage to set wait dependencies. + */ +struct drm_v3d_multi_sync { + struct drm_v3d_extension base; + /* Array of wait and signal semaphores */ + __u64 in_syncs; + __u64 out_syncs; + + /* Number of entries */ + __u32 in_sync_count; + __u32 out_sync_count; + + /* set the stage (v3d_queue) to sync */ + __u32 wait_stage; + + __u32 pad; /* mbz */ +}; + +/** + * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D + * engine. + * + * This asks the kernel to have the GPU execute an optional binner + * command list, and a render command list. + * + * The L1T, slice, L2C, L2T, and GCA caches will be flushed before + * each CL executes. The VCD cache should be flushed (if necessary) + * by the submitted CLs. The TLB writes are guaranteed to have been + * flushed by the time the render done IRQ happens, which is the + * trigger for out_sync. Any dirtying of cachelines by the job (only + * possible using TMU writes) must be flushed by the caller using the + * DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG flag. + */ +struct drm_v3d_submit_cl { + /* Pointer to the binner command list. + * + * This is the first set of commands executed, which runs the + * coordinate shader to determine where primitives land on the screen, + * then writes out the state updates and draw calls necessary per tile + * to the tile allocation BO. + * + * This BCL will block on any previous BCL submitted on the + * same FD, but not on any RCL or BCLs submitted by other + * clients -- that is left up to the submitter to control + * using in_sync_bcl if necessary. + */ + __u32 bcl_start; + + /** End address of the BCL (first byte after the BCL) */ + __u32 bcl_end; + + /* Offset of the render command list. + * + * This is the second set of commands executed, which will either + * execute the tiles that have been set up by the BCL, or a fixed set + * of tiles (in the case of RCL-only blits). + * + * This RCL will block on this submit's BCL, and any previous + * RCL submitted on the same FD, but not on any RCL or BCLs + * submitted by other clients -- that is left up to the + * submitter to control using in_sync_rcl if necessary. + */ + __u32 rcl_start; + + /** End address of the RCL (first byte after the RCL) */ + __u32 rcl_end; + + /** An optional sync object to wait on before starting the BCL. */ + __u32 in_sync_bcl; + /** An optional sync object to wait on before starting the RCL. */ + __u32 in_sync_rcl; + /** An optional sync object to place the completion fence in. */ + __u32 out_sync; + + /* Offset of the tile alloc memory + * + * This is optional on V3D 3.3 (where the CL can set the value) but + * required on V3D 4.1. + */ + __u32 qma; + + /** Size of the tile alloc memory. */ + __u32 qms; + + /** Offset of the tile state data array. */ + __u32 qts; + + /* Pointer to a u32 array of the BOs that are referenced by the job. + */ + __u64 bo_handles; + + /* Number of BO handles passed in (size is that times 4). */ + __u32 bo_handle_count; + + /* DRM_V3D_SUBMIT_* properties */ + __u32 flags; + + /* ID of the perfmon to attach to this job. 0 means no perfmon. */ + __u32 perfmon_id; + + __u32 pad; + + /* Pointer to an array of ioctl extensions*/ + __u64 extensions; +}; + +/** + * struct drm_v3d_wait_bo - ioctl argument for waiting for + * completion of the last DRM_V3D_SUBMIT_CL on a BO. + * + * This is useful for cases where multiple processes might be + * rendering to a BO and you want to wait for all rendering to be + * completed. + */ +struct drm_v3d_wait_bo { + __u32 handle; + __u32 pad; + __u64 timeout_ns; +}; + +/** + * struct drm_v3d_create_bo - ioctl argument for creating V3D BOs. + * + * There are currently no values for the flags argument, but it may be + * used in a future extension. + */ +struct drm_v3d_create_bo { + __u32 size; + __u32 flags; + /** Returned GEM handle for the BO. */ + __u32 handle; + /** + * Returned offset for the BO in the V3D address space. This offset + * is private to the DRM fd and is valid for the lifetime of the GEM + * handle. + * + * This offset value will always be nonzero, since various HW + * units treat 0 specially. + */ + __u32 offset; +}; + +/** + * struct drm_v3d_mmap_bo - ioctl argument for mapping V3D BOs. + * + * This doesn't actually perform an mmap. Instead, it returns the + * offset you need to use in an mmap on the DRM device node. This + * means that tools like valgrind end up knowing about the mapped + * memory. + * + * There are currently no values for the flags argument, but it may be + * used in a future extension. + */ +struct drm_v3d_mmap_bo { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 flags; + /** offset into the drm node to use for subsequent mmap call. */ + __u64 offset; +}; + +enum drm_v3d_param { + DRM_V3D_PARAM_V3D_UIFCFG, + DRM_V3D_PARAM_V3D_HUB_IDENT1, + DRM_V3D_PARAM_V3D_HUB_IDENT2, + DRM_V3D_PARAM_V3D_HUB_IDENT3, + DRM_V3D_PARAM_V3D_CORE0_IDENT0, + DRM_V3D_PARAM_V3D_CORE0_IDENT1, + DRM_V3D_PARAM_V3D_CORE0_IDENT2, + DRM_V3D_PARAM_SUPPORTS_TFU, + DRM_V3D_PARAM_SUPPORTS_CSD, + DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH, + DRM_V3D_PARAM_SUPPORTS_PERFMON, + DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT, + DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE, + DRM_V3D_PARAM_MAX_PERF_COUNTERS, + DRM_V3D_PARAM_SUPPORTS_SUPER_PAGES, +}; + +struct drm_v3d_get_param { + __u32 param; + __u32 pad; + __u64 value; +}; + +/** + * Returns the offset for the BO in the V3D address space for this DRM fd. + * This is the same value returned by drm_v3d_create_bo, if that was called + * from this DRM fd. + */ +struct drm_v3d_get_bo_offset { + __u32 handle; + __u32 offset; +}; + +struct drm_v3d_submit_tfu { + __u32 icfg; + __u32 iia; + __u32 iis; + __u32 ica; + __u32 iua; + __u32 ioa; + __u32 ios; + __u32 coef[4]; + /* First handle is the output BO, following are other inputs. + * 0 for unused. + */ + __u32 bo_handles[4]; + /* sync object to block on before running the TFU job. Each TFU + * job will execute in the order submitted to its FD. Synchronization + * against rendering jobs requires using sync objects. + */ + __u32 in_sync; + /* Sync object to signal when the TFU job is done. */ + __u32 out_sync; + + __u32 flags; + + /* Pointer to an array of ioctl extensions*/ + __u64 extensions; + + struct { + __u32 ioc; + __u32 pad; + } v71; +}; + +/* Submits a compute shader for dispatch. This job will block on any + * previous compute shaders submitted on this fd, and any other + * synchronization must be performed with in_sync/out_sync. + */ +struct drm_v3d_submit_csd { + __u32 cfg[7]; + __u32 coef[4]; + + /* Pointer to a u32 array of the BOs that are referenced by the job. + */ + __u64 bo_handles; + + /* Number of BO handles passed in (size is that times 4). */ + __u32 bo_handle_count; + + /* sync object to block on before running the CSD job. Each + * CSD job will execute in the order submitted to its FD. + * Synchronization against rendering/TFU jobs or CSD from + * other fds requires using sync objects. + */ + __u32 in_sync; + /* Sync object to signal when the CSD job is done. */ + __u32 out_sync; + + /* ID of the perfmon to attach to this job. 0 means no perfmon. */ + __u32 perfmon_id; + + /* Pointer to an array of ioctl extensions*/ + __u64 extensions; + + __u32 flags; + + __u32 pad; +}; + +/** + * struct drm_v3d_indirect_csd - ioctl extension for the CPU job to create an + * indirect CSD + * + * When an extension of DRM_V3D_EXT_ID_CPU_INDIRECT_CSD id is defined, it + * points to this extension to define a indirect CSD submission. It creates a + * CPU job linked to a CSD job. The CPU job waits for the indirect CSD + * dependencies and, once they are signaled, it updates the CSD job config + * before allowing the CSD job execution. + */ +struct drm_v3d_indirect_csd { + struct drm_v3d_extension base; + + /* Indirect CSD */ + struct drm_v3d_submit_csd submit; + + /* Handle of the indirect BO, that should be also attached to the + * indirect CSD. + */ + __u32 indirect; + + /* Offset within the BO where the workgroup counts are stored */ + __u32 offset; + + /* Workgroups size */ + __u32 wg_size; + + /* Indices of the uniforms with the workgroup dispatch counts + * in the uniform stream. If the uniform rewrite is not needed, + * the offset must be 0xffffffff. + */ + __u32 wg_uniform_offsets[3]; +}; + +/** + * struct drm_v3d_timestamp_query - ioctl extension for the CPU job to calculate + * a timestamp query + * + * When an extension DRM_V3D_EXT_ID_TIMESTAMP_QUERY is defined, it points to + * this extension to define a timestamp query submission. This CPU job will + * calculate the timestamp query and update the query value within the + * timestamp BO. Moreover, it will signal the timestamp syncobj to indicate + * query availability. + */ +struct drm_v3d_timestamp_query { + struct drm_v3d_extension base; + + /* Array of queries' offsets within the timestamp BO for their value */ + __u64 offsets; + + /* Array of timestamp's syncobjs to indicate its availability */ + __u64 syncs; + + /* Number of queries */ + __u32 count; + + /* mbz */ + __u32 pad; +}; + +/** + * struct drm_v3d_reset_timestamp_query - ioctl extension for the CPU job to + * reset timestamp queries + * + * When an extension DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY is defined, it + * points to this extension to define a reset timestamp submission. This CPU + * job will reset the timestamp queries based on value offset of the first + * query. Moreover, it will reset the timestamp syncobj to reset query + * availability. + */ +struct drm_v3d_reset_timestamp_query { + struct drm_v3d_extension base; + + /* Array of timestamp's syncobjs to indicate its availability */ + __u64 syncs; + + /* Offset of the first query within the timestamp BO for its value */ + __u32 offset; + + /* Number of queries */ + __u32 count; +}; + +/** + * struct drm_v3d_copy_timestamp_query - ioctl extension for the CPU job to copy + * query results to a buffer + * + * When an extension DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY is defined, it + * points to this extension to define a copy timestamp query submission. This + * CPU job will copy the timestamp queries results to a BO with the offset + * and stride defined in the extension. + */ +struct drm_v3d_copy_timestamp_query { + struct drm_v3d_extension base; + + /* Define if should write to buffer using 64 or 32 bits */ + __u8 do_64bit; + + /* Define if it can write to buffer even if the query is not available + */ + __u8 do_partial; + + /* Define if it should write availability bit to buffer */ + __u8 availability_bit; + + /* mbz */ + __u8 pad; + + /* Offset of the buffer in the BO */ + __u32 offset; + + /* Stride of the buffer in the BO */ + __u32 stride; + + /* Number of queries */ + __u32 count; + + /* Array of queries' offsets within the timestamp BO for their value */ + __u64 offsets; + + /* Array of timestamp's syncobjs to indicate its availability */ + __u64 syncs; +}; + +/** + * struct drm_v3d_reset_performance_query - ioctl extension for the CPU job to + * reset performance queries + * + * When an extension DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY is defined, it + * points to this extension to define a reset performance submission. This CPU + * job will reset the performance queries by resetting the values of the + * performance monitors. Moreover, it will reset the syncobj to reset query + * availability. + */ +struct drm_v3d_reset_performance_query { + struct drm_v3d_extension base; + + /* Array of performance queries's syncobjs to indicate its availability + */ + __u64 syncs; + + /* Number of queries */ + __u32 count; + + /* Number of performance monitors */ + __u32 nperfmons; + + /* Array of u64 user-pointers that point to an array of kperfmon_ids */ + __u64 kperfmon_ids; +}; + +/** + * struct drm_v3d_copy_performance_query - ioctl extension for the CPU job to + * copy performance query results to a buffer + * + * When an extension DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY is defined, it + * points to this extension to define a copy performance query submission. This + * CPU job will copy the performance queries results to a BO with the offset + * and stride defined in the extension. + */ +struct drm_v3d_copy_performance_query { + struct drm_v3d_extension base; + + /* Define if should write to buffer using 64 or 32 bits */ + __u8 do_64bit; + + /* Define if it can write to buffer even if the query is not available + */ + __u8 do_partial; + + /* Define if it should write availability bit to buffer */ + __u8 availability_bit; + + /* mbz */ + __u8 pad; + + /* Offset of the buffer in the BO */ + __u32 offset; + + /* Stride of the buffer in the BO */ + __u32 stride; + + /* Number of performance monitors */ + __u32 nperfmons; + + /* Number of performance counters related to this query pool */ + __u32 ncounters; + + /* Number of queries */ + __u32 count; + + /* Array of performance queries's syncobjs to indicate its availability + */ + __u64 syncs; + + /* Array of u64 user-pointers that point to an array of kperfmon_ids */ + __u64 kperfmon_ids; +}; + +struct drm_v3d_submit_cpu { + /* Pointer to a u32 array of the BOs that are referenced by the job. + * + * For DRM_V3D_EXT_ID_CPU_INDIRECT_CSD, it must contain only one BO, + * that contains the workgroup counts. + * + * For DRM_V3D_EXT_ID_TIMESTAMP_QUERY, it must contain only one BO, + * that will contain the timestamp. + * + * For DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY, it must contain only + * one BO, that contains the timestamp. + * + * For DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY, it must contain two + * BOs. The first is the BO where the timestamp queries will be written + * to. The second is the BO that contains the timestamp. + * + * For DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY, it must contain no + * BOs. + * + * For DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY, it must contain one + * BO, where the performance queries will be written. + */ + __u64 bo_handles; + + /* Number of BO handles passed in (size is that times 4). */ + __u32 bo_handle_count; + + __u32 flags; + + /* Pointer to an array of ioctl extensions*/ + __u64 extensions; +}; + +/* The performance counters index represented by this enum are deprecated and + * must no longer be used. These counters are only valid for V3D 4.2. + * + * In order to check for performance counter information, + * use DRM_IOCTL_V3D_PERFMON_GET_COUNTER. + * + * Don't use V3D_PERFCNT_NUM to retrieve the maximum number of performance + * counters. You should use DRM_IOCTL_V3D_GET_PARAM with the following + * parameter: DRM_V3D_PARAM_MAX_PERF_COUNTERS. + */ +enum { + V3D_PERFCNT_FEP_VALID_PRIMTS_NO_PIXELS, + V3D_PERFCNT_FEP_VALID_PRIMS, + V3D_PERFCNT_FEP_EZ_NFCLIP_QUADS, + V3D_PERFCNT_FEP_VALID_QUADS, + V3D_PERFCNT_TLB_QUADS_STENCIL_FAIL, + V3D_PERFCNT_TLB_QUADS_STENCILZ_FAIL, + V3D_PERFCNT_TLB_QUADS_STENCILZ_PASS, + V3D_PERFCNT_TLB_QUADS_ZERO_COV, + V3D_PERFCNT_TLB_QUADS_NONZERO_COV, + V3D_PERFCNT_TLB_QUADS_WRITTEN, + V3D_PERFCNT_PTB_PRIM_VIEWPOINT_DISCARD, + V3D_PERFCNT_PTB_PRIM_CLIP, + V3D_PERFCNT_PTB_PRIM_REV, + V3D_PERFCNT_QPU_IDLE_CYCLES, + V3D_PERFCNT_QPU_ACTIVE_CYCLES_VERTEX_COORD_USER, + V3D_PERFCNT_QPU_ACTIVE_CYCLES_FRAG, + V3D_PERFCNT_QPU_CYCLES_VALID_INSTR, + V3D_PERFCNT_QPU_CYCLES_TMU_STALL, + V3D_PERFCNT_QPU_CYCLES_SCOREBOARD_STALL, + V3D_PERFCNT_QPU_CYCLES_VARYINGS_STALL, + V3D_PERFCNT_QPU_IC_HIT, + V3D_PERFCNT_QPU_IC_MISS, + V3D_PERFCNT_QPU_UC_HIT, + V3D_PERFCNT_QPU_UC_MISS, + V3D_PERFCNT_TMU_TCACHE_ACCESS, + V3D_PERFCNT_TMU_TCACHE_MISS, + V3D_PERFCNT_VPM_VDW_STALL, + V3D_PERFCNT_VPM_VCD_STALL, + V3D_PERFCNT_BIN_ACTIVE, + V3D_PERFCNT_RDR_ACTIVE, + V3D_PERFCNT_L2T_HITS, + V3D_PERFCNT_L2T_MISSES, + V3D_PERFCNT_CYCLE_COUNT, + V3D_PERFCNT_QPU_CYCLES_STALLED_VERTEX_COORD_USER, + V3D_PERFCNT_QPU_CYCLES_STALLED_FRAGMENT, + V3D_PERFCNT_PTB_PRIMS_BINNED, + V3D_PERFCNT_AXI_WRITES_WATCH_0, + V3D_PERFCNT_AXI_READS_WATCH_0, + V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_0, + V3D_PERFCNT_AXI_READ_STALLS_WATCH_0, + V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_0, + V3D_PERFCNT_AXI_READ_BYTES_WATCH_0, + V3D_PERFCNT_AXI_WRITES_WATCH_1, + V3D_PERFCNT_AXI_READS_WATCH_1, + V3D_PERFCNT_AXI_WRITE_STALLS_WATCH_1, + V3D_PERFCNT_AXI_READ_STALLS_WATCH_1, + V3D_PERFCNT_AXI_WRITE_BYTES_WATCH_1, + V3D_PERFCNT_AXI_READ_BYTES_WATCH_1, + V3D_PERFCNT_TLB_PARTIAL_QUADS, + V3D_PERFCNT_TMU_CONFIG_ACCESSES, + V3D_PERFCNT_L2T_NO_ID_STALL, + V3D_PERFCNT_L2T_COM_QUE_STALL, + V3D_PERFCNT_L2T_TMU_WRITES, + V3D_PERFCNT_TMU_ACTIVE_CYCLES, + V3D_PERFCNT_TMU_STALLED_CYCLES, + V3D_PERFCNT_CLE_ACTIVE, + V3D_PERFCNT_L2T_TMU_READS, + V3D_PERFCNT_L2T_CLE_READS, + V3D_PERFCNT_L2T_VCD_READS, + V3D_PERFCNT_L2T_TMUCFG_READS, + V3D_PERFCNT_L2T_SLC0_READS, + V3D_PERFCNT_L2T_SLC1_READS, + V3D_PERFCNT_L2T_SLC2_READS, + V3D_PERFCNT_L2T_TMU_W_MISSES, + V3D_PERFCNT_L2T_TMU_R_MISSES, + V3D_PERFCNT_L2T_CLE_MISSES, + V3D_PERFCNT_L2T_VCD_MISSES, + V3D_PERFCNT_L2T_TMUCFG_MISSES, + V3D_PERFCNT_L2T_SLC0_MISSES, + V3D_PERFCNT_L2T_SLC1_MISSES, + V3D_PERFCNT_L2T_SLC2_MISSES, + V3D_PERFCNT_CORE_MEM_WRITES, + V3D_PERFCNT_L2T_MEM_WRITES, + V3D_PERFCNT_PTB_MEM_WRITES, + V3D_PERFCNT_TLB_MEM_WRITES, + V3D_PERFCNT_CORE_MEM_READS, + V3D_PERFCNT_L2T_MEM_READS, + V3D_PERFCNT_PTB_MEM_READS, + V3D_PERFCNT_PSE_MEM_READS, + V3D_PERFCNT_TLB_MEM_READS, + V3D_PERFCNT_GMP_MEM_READS, + V3D_PERFCNT_PTB_W_MEM_WORDS, + V3D_PERFCNT_TLB_W_MEM_WORDS, + V3D_PERFCNT_PSE_R_MEM_WORDS, + V3D_PERFCNT_TLB_R_MEM_WORDS, + V3D_PERFCNT_TMU_MRU_HITS, + V3D_PERFCNT_COMPUTE_ACTIVE, + V3D_PERFCNT_NUM, +}; + +#define DRM_V3D_MAX_PERF_COUNTERS 32 + +struct drm_v3d_perfmon_create { + __u32 id; + __u32 ncounters; + __u8 counters[DRM_V3D_MAX_PERF_COUNTERS]; +}; + +struct drm_v3d_perfmon_destroy { + __u32 id; +}; + +/* + * Returns the values of the performance counters tracked by this + * perfmon (as an array of ncounters u64 values). + * + * No implicit synchronization is performed, so the user has to + * guarantee that any jobs using this perfmon have already been + * completed (probably by blocking on the seqno returned by the + * last exec that used the perfmon). + */ +struct drm_v3d_perfmon_get_values { + __u32 id; + __u32 pad; + __u64 values_ptr; +}; + +#define DRM_V3D_PERFCNT_MAX_NAME 64 +#define DRM_V3D_PERFCNT_MAX_CATEGORY 32 +#define DRM_V3D_PERFCNT_MAX_DESCRIPTION 256 + +/** + * struct drm_v3d_perfmon_get_counter - ioctl to get the description of a + * performance counter + * + * As userspace needs to retrieve information about the performance counters + * available, this IOCTL allows users to get information about a performance + * counter (name, category and description). + */ +struct drm_v3d_perfmon_get_counter { + /* + * Counter ID + * + * Must be smaller than the maximum number of performance counters, + * which can be retrieve through DRM_V3D_PARAM_MAX_PERF_COUNTERS. + */ + __u8 counter; + + /* Name of the counter */ + __u8 name[DRM_V3D_PERFCNT_MAX_NAME]; + + /* Category of the counter */ + __u8 category[DRM_V3D_PERFCNT_MAX_CATEGORY]; + + /* Description of the counter */ + __u8 description[DRM_V3D_PERFCNT_MAX_DESCRIPTION]; + + /* mbz */ + __u8 reserved[7]; +}; + +#define DRM_V3D_PERFMON_CLEAR_GLOBAL 0x0001 + +/** + * struct drm_v3d_perfmon_set_global - ioctl to define a global performance + * monitor + * + * The global performance monitor will be used for all jobs. If a global + * performance monitor is defined, jobs with a self-defined performance + * monitor won't be allowed. + */ +struct drm_v3d_perfmon_set_global { + __u32 flags; + __u32 id; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _V3D_DRM_H_ */ diff --git a/include/arch/x86_64/drm/vc4_drm.h b/include/arch/x86_64/drm/vc4_drm.h new file mode 100644 index 00000000..691fa998 --- /dev/null +++ b/include/arch/x86_64/drm/vc4_drm.h @@ -0,0 +1,466 @@ +/* + * Copyright © 2014-2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _VC4_DRM_H_ +#define _VC4_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_VC4_SUBMIT_CL 0x00 +#define DRM_VC4_WAIT_SEQNO 0x01 +#define DRM_VC4_WAIT_BO 0x02 +#define DRM_VC4_CREATE_BO 0x03 +#define DRM_VC4_MMAP_BO 0x04 +#define DRM_VC4_CREATE_SHADER_BO 0x05 +#define DRM_VC4_GET_HANG_STATE 0x06 +#define DRM_VC4_GET_PARAM 0x07 +#define DRM_VC4_SET_TILING 0x08 +#define DRM_VC4_GET_TILING 0x09 +#define DRM_VC4_LABEL_BO 0x0a +#define DRM_VC4_GEM_MADVISE 0x0b +#define DRM_VC4_PERFMON_CREATE 0x0c +#define DRM_VC4_PERFMON_DESTROY 0x0d +#define DRM_VC4_PERFMON_GET_VALUES 0x0e + +#define DRM_IOCTL_VC4_SUBMIT_CL \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) +#define DRM_IOCTL_VC4_WAIT_SEQNO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, \ + struct drm_vc4_wait_seqno) +#define DRM_IOCTL_VC4_WAIT_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo) +#define DRM_IOCTL_VC4_CREATE_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo) +#define DRM_IOCTL_VC4_MMAP_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) +#define DRM_IOCTL_VC4_CREATE_SHADER_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, \ + struct drm_vc4_create_shader_bo) +#define DRM_IOCTL_VC4_GET_HANG_STATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, \ + struct drm_vc4_get_hang_state) +#define DRM_IOCTL_VC4_GET_PARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_PARAM, struct drm_vc4_get_param) +#define DRM_IOCTL_VC4_SET_TILING \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SET_TILING, \ + struct drm_vc4_set_tiling) +#define DRM_IOCTL_VC4_GET_TILING \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_TILING, \ + struct drm_vc4_get_tiling) +#define DRM_IOCTL_VC4_LABEL_BO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_LABEL_BO, struct drm_vc4_label_bo) +#define DRM_IOCTL_VC4_GEM_MADVISE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GEM_MADVISE, \ + struct drm_vc4_gem_madvise) +#define DRM_IOCTL_VC4_PERFMON_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_PERFMON_CREATE, \ + struct drm_vc4_perfmon_create) +#define DRM_IOCTL_VC4_PERFMON_DESTROY \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_PERFMON_DESTROY, \ + struct drm_vc4_perfmon_destroy) +#define DRM_IOCTL_VC4_PERFMON_GET_VALUES \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_PERFMON_GET_VALUES, \ + struct drm_vc4_perfmon_get_values) + +struct drm_vc4_submit_rcl_surface { + __u32 hindex; /* Handle index, or ~0 if not present. */ + __u32 offset; /* Offset to start of buffer. */ + /* + * Bits for either render config (color_write) or load/store packet. + * Bits should all be 0 for MSAA load/stores. + */ + __u16 bits; + +#define VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES (1 << 0) + __u16 flags; +}; + +/** + * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D + * engine. + * + * Drivers typically use GPU BOs to store batchbuffers / command lists and + * their associated state. However, because the VC4 lacks an MMU, we have to + * do validation of memory accesses by the GPU commands. If we were to store + * our commands in BOs, we'd need to do uncached readback from them to do the + * validation process, which is too expensive. Instead, userspace accumulates + * commands and associated state in plain memory, then the kernel copies the + * data to its own address space, and then validates and stores it in a GPU + * BO. + */ +struct drm_vc4_submit_cl { + /* Pointer to the binner command list. + * + * This is the first set of commands executed, which runs the + * coordinate shader to determine where primitives land on the screen, + * then writes out the state updates and draw calls necessary per tile + * to the tile allocation BO. + */ + __u64 bin_cl; + + /* Pointer to the shader records. + * + * Shader records are the structures read by the hardware that contain + * pointers to uniforms, shaders, and vertex attributes. The + * reference to the shader record has enough information to determine + * how many pointers are necessary (fixed number for shaders/uniforms, + * and an attribute count), so those BO indices into bo_handles are + * just stored as __u32s before each shader record passed in. + */ + __u64 shader_rec; + + /* Pointer to uniform data and texture handles for the textures + * referenced by the shader. + * + * For each shader state record, there is a set of uniform data in the + * order referenced by the record (FS, VS, then CS). Each set of + * uniform data has a __u32 index into bo_handles per texture + * sample operation, in the order the QPU_W_TMUn_S writes appear in + * the program. Following the texture BO handle indices is the actual + * uniform data. + * + * The individual uniform state blocks don't have sizes passed in, + * because the kernel has to determine the sizes anyway during shader + * code validation. + */ + __u64 uniforms; + __u64 bo_handles; + + /* Size in bytes of the binner command list. */ + __u32 bin_cl_size; + /* Size in bytes of the set of shader records. */ + __u32 shader_rec_size; + /* Number of shader records. + * + * This could just be computed from the contents of shader_records and + * the address bits of references to them from the bin CL, but it + * keeps the kernel from having to resize some allocations it makes. + */ + __u32 shader_rec_count; + /* Size in bytes of the uniform state. */ + __u32 uniforms_size; + + /* Number of BO handles passed in (size is that times 4). */ + __u32 bo_handle_count; + + /* RCL setup: */ + __u16 width; + __u16 height; + __u8 min_x_tile; + __u8 min_y_tile; + __u8 max_x_tile; + __u8 max_y_tile; + struct drm_vc4_submit_rcl_surface color_read; + struct drm_vc4_submit_rcl_surface color_write; + struct drm_vc4_submit_rcl_surface zs_read; + struct drm_vc4_submit_rcl_surface zs_write; + struct drm_vc4_submit_rcl_surface msaa_color_write; + struct drm_vc4_submit_rcl_surface msaa_zs_write; + __u32 clear_color[2]; + __u32 clear_z; + __u8 clear_s; + + __u32 pad : 24; + +#define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0) +/* By default, the kernel gets to choose the order that the tiles are + * rendered in. If this is set, then the tiles will be rendered in a + * raster order, with the right-to-left vs left-to-right and + * top-to-bottom vs bottom-to-top dictated by + * VC4_SUBMIT_CL_RCL_ORDER_INCREASING_*. This allows overlapping + * blits to be implemented using the 3D engine. + */ +#define VC4_SUBMIT_CL_FIXED_RCL_ORDER (1 << 1) +#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X (1 << 2) +#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y (1 << 3) + __u32 flags; + + /* Returned value of the seqno of this render job (for the + * wait ioctl). + */ + __u64 seqno; + + /* ID of the perfmon to attach to this job. 0 means no perfmon. */ + __u32 perfmonid; + + /* Syncobj handle to wait on. If set, processing of this render job + * will not start until the syncobj is signaled. 0 means ignore. + */ + __u32 in_sync; + + /* Syncobj handle to export fence to. If set, the fence in the syncobj + * will be replaced with a fence that signals upon completion of this + * render job. 0 means ignore. + */ + __u32 out_sync; + + __u32 pad2; +}; + +/** + * struct drm_vc4_wait_seqno - ioctl argument for waiting for + * DRM_VC4_SUBMIT_CL completion using its returned seqno. + * + * timeout_ns is the timeout in nanoseconds, where "0" means "don't + * block, just return the status." + */ +struct drm_vc4_wait_seqno { + __u64 seqno; + __u64 timeout_ns; +}; + +/** + * struct drm_vc4_wait_bo - ioctl argument for waiting for + * completion of the last DRM_VC4_SUBMIT_CL on a BO. + * + * This is useful for cases where multiple processes might be + * rendering to a BO and you want to wait for all rendering to be + * completed. + */ +struct drm_vc4_wait_bo { + __u32 handle; + __u32 pad; + __u64 timeout_ns; +}; + +/** + * struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs. + * + * There are currently no values for the flags argument, but it may be + * used in a future extension. + */ +struct drm_vc4_create_bo { + __u32 size; + __u32 flags; + /** Returned GEM handle for the BO. */ + __u32 handle; + __u32 pad; +}; + +/** + * struct drm_vc4_mmap_bo - ioctl argument for mapping VC4 BOs. + * + * This doesn't actually perform an mmap. Instead, it returns the + * offset you need to use in an mmap on the DRM device node. This + * means that tools like valgrind end up knowing about the mapped + * memory. + * + * There are currently no values for the flags argument, but it may be + * used in a future extension. + */ +struct drm_vc4_mmap_bo { + /** Handle for the object being mapped. */ + __u32 handle; + __u32 flags; + /** offset into the drm node to use for subsequent mmap call. */ + __u64 offset; +}; + +/** + * struct drm_vc4_create_shader_bo - ioctl argument for creating VC4 + * shader BOs. + * + * Since allowing a shader to be overwritten while it's also being + * executed from would allow privlege escalation, shaders must be + * created using this ioctl, and they can't be mmapped later. + */ +struct drm_vc4_create_shader_bo { + /* Size of the data argument. */ + __u32 size; + /* Flags, currently must be 0. */ + __u32 flags; + + /* Pointer to the data. */ + __u64 data; + + /** Returned GEM handle for the BO. */ + __u32 handle; + /* Pad, must be 0. */ + __u32 pad; +}; + +struct drm_vc4_get_hang_state_bo { + __u32 handle; + __u32 paddr; + __u32 size; + __u32 pad; +}; + +/** + * struct drm_vc4_hang_state - ioctl argument for collecting state + * from a GPU hang for analysis. + */ +struct drm_vc4_get_hang_state { + /** Pointer to array of struct drm_vc4_get_hang_state_bo. */ + __u64 bo; + /** + * On input, the size of the bo array. Output is the number + * of bos to be returned. + */ + __u32 bo_count; + + __u32 start_bin, start_render; + + __u32 ct0ca, ct0ea; + __u32 ct1ca, ct1ea; + __u32 ct0cs, ct1cs; + __u32 ct0ra0, ct1ra0; + + __u32 bpca, bpcs; + __u32 bpoa, bpos; + + __u32 vpmbase; + + __u32 dbge; + __u32 fdbgo; + __u32 fdbgb; + __u32 fdbgr; + __u32 fdbgs; + __u32 errstat; + + /* Pad that we may save more registers into in the future. */ + __u32 pad[16]; +}; + +#define DRM_VC4_PARAM_V3D_IDENT0 0 +#define DRM_VC4_PARAM_V3D_IDENT1 1 +#define DRM_VC4_PARAM_V3D_IDENT2 2 +#define DRM_VC4_PARAM_SUPPORTS_BRANCHES 3 +#define DRM_VC4_PARAM_SUPPORTS_ETC1 4 +#define DRM_VC4_PARAM_SUPPORTS_THREADED_FS 5 +#define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER 6 +#define DRM_VC4_PARAM_SUPPORTS_MADVISE 7 +#define DRM_VC4_PARAM_SUPPORTS_PERFMON 8 + +struct drm_vc4_get_param { + __u32 param; + __u32 pad; + __u64 value; +}; + +struct drm_vc4_get_tiling { + __u32 handle; + __u32 flags; + __u64 modifier; +}; + +struct drm_vc4_set_tiling { + __u32 handle; + __u32 flags; + __u64 modifier; +}; + +/** + * struct drm_vc4_label_bo - Attach a name to a BO for debug purposes. + */ +struct drm_vc4_label_bo { + __u32 handle; + __u32 len; + __u64 name; +}; + +/* + * States prefixed with '__' are internal states and cannot be passed to the + * DRM_IOCTL_VC4_GEM_MADVISE ioctl. + */ +#define VC4_MADV_WILLNEED 0 +#define VC4_MADV_DONTNEED 1 +#define __VC4_MADV_PURGED 2 +#define __VC4_MADV_NOTSUPP 3 + +struct drm_vc4_gem_madvise { + __u32 handle; + __u32 madv; + __u32 retained; + __u32 pad; +}; + +enum { + VC4_PERFCNT_FEP_VALID_PRIMS_NO_RENDER, + VC4_PERFCNT_FEP_VALID_PRIMS_RENDER, + VC4_PERFCNT_FEP_CLIPPED_QUADS, + VC4_PERFCNT_FEP_VALID_QUADS, + VC4_PERFCNT_TLB_QUADS_NOT_PASSING_STENCIL, + VC4_PERFCNT_TLB_QUADS_NOT_PASSING_Z_AND_STENCIL, + VC4_PERFCNT_TLB_QUADS_PASSING_Z_AND_STENCIL, + VC4_PERFCNT_TLB_QUADS_ZERO_COVERAGE, + VC4_PERFCNT_TLB_QUADS_NON_ZERO_COVERAGE, + VC4_PERFCNT_TLB_QUADS_WRITTEN_TO_COLOR_BUF, + VC4_PERFCNT_PLB_PRIMS_OUTSIDE_VIEWPORT, + VC4_PERFCNT_PLB_PRIMS_NEED_CLIPPING, + VC4_PERFCNT_PSE_PRIMS_REVERSED, + VC4_PERFCNT_QPU_TOTAL_IDLE_CYCLES, + VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_VERTEX_COORD_SHADING, + VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_FRAGMENT_SHADING, + VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_EXEC_VALID_INST, + VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_WAITING_TMUS, + VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_WAITING_SCOREBOARD, + VC4_PERFCNT_QPU_TOTAL_CLK_CYCLES_WAITING_VARYINGS, + VC4_PERFCNT_QPU_TOTAL_INST_CACHE_HIT, + VC4_PERFCNT_QPU_TOTAL_INST_CACHE_MISS, + VC4_PERFCNT_QPU_TOTAL_UNIFORM_CACHE_HIT, + VC4_PERFCNT_QPU_TOTAL_UNIFORM_CACHE_MISS, + VC4_PERFCNT_TMU_TOTAL_TEXT_QUADS_PROCESSED, + VC4_PERFCNT_TMU_TOTAL_TEXT_CACHE_MISS, + VC4_PERFCNT_VPM_TOTAL_CLK_CYCLES_VDW_STALLED, + VC4_PERFCNT_VPM_TOTAL_CLK_CYCLES_VCD_STALLED, + VC4_PERFCNT_L2C_TOTAL_L2_CACHE_HIT, + VC4_PERFCNT_L2C_TOTAL_L2_CACHE_MISS, + VC4_PERFCNT_NUM_EVENTS, +}; + +#define DRM_VC4_MAX_PERF_COUNTERS 16 + +struct drm_vc4_perfmon_create { + __u32 id; + __u32 ncounters; + __u8 events[DRM_VC4_MAX_PERF_COUNTERS]; +}; + +struct drm_vc4_perfmon_destroy { + __u32 id; +}; + +/* + * Returns the values of the performance counters tracked by this + * perfmon (as an array of ncounters u64 values). + * + * No implicit synchronization is performed, so the user has to + * guarantee that any jobs using this perfmon have already been + * completed (probably by blocking on the seqno returned by the + * last exec that used the perfmon). + */ +struct drm_vc4_perfmon_get_values { + __u32 id; + __u64 values_ptr; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _VC4_DRM_H_ */ diff --git a/include/arch/x86_64/drm/vgem_drm.h b/include/arch/x86_64/drm/vgem_drm.h new file mode 100644 index 00000000..b2d8eed6 --- /dev/null +++ b/include/arch/x86_64/drm/vgem_drm.h @@ -0,0 +1,66 @@ +/* + * Copyright 2016 Intel Corporation + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _VGEM_DRM_H_ +#define _VGEM_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints. + */ +#define DRM_VGEM_FENCE_ATTACH 0x1 +#define DRM_VGEM_FENCE_SIGNAL 0x2 + +#define DRM_IOCTL_VGEM_FENCE_ATTACH \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VGEM_FENCE_ATTACH, \ + struct drm_vgem_fence_attach) +#define DRM_IOCTL_VGEM_FENCE_SIGNAL \ + DRM_IOW(DRM_COMMAND_BASE + DRM_VGEM_FENCE_SIGNAL, \ + struct drm_vgem_fence_signal) + +struct drm_vgem_fence_attach { + __u32 handle; + __u32 flags; +#define VGEM_FENCE_WRITE 0x1 + __u32 out_fence; + __u32 pad; +}; + +struct drm_vgem_fence_signal { + __u32 fence; + __u32 flags; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _VGEM_DRM_H_ */ diff --git a/include/arch/x86_64/drm/virtgpu_drm.h b/include/arch/x86_64/drm/virtgpu_drm.h new file mode 100644 index 00000000..4f9458d6 --- /dev/null +++ b/include/arch/x86_64/drm/virtgpu_drm.h @@ -0,0 +1,278 @@ +/* + * Copyright 2013 Red Hat + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRTGPU_DRM_H +#define VIRTGPU_DRM_H + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints. + * + * Do not use pointers, use __u64 instead for 32 bit / 64 bit user/kernel + * compatibility Keep fields aligned to their size + */ + +#define DRM_VIRTGPU_MAP 0x01 +#define DRM_VIRTGPU_EXECBUFFER 0x02 +#define DRM_VIRTGPU_GETPARAM 0x03 +#define DRM_VIRTGPU_RESOURCE_CREATE 0x04 +#define DRM_VIRTGPU_RESOURCE_INFO 0x05 +#define DRM_VIRTGPU_TRANSFER_FROM_HOST 0x06 +#define DRM_VIRTGPU_TRANSFER_TO_HOST 0x07 +#define DRM_VIRTGPU_WAIT 0x08 +#define DRM_VIRTGPU_GET_CAPS 0x09 +#define DRM_VIRTGPU_RESOURCE_CREATE_BLOB 0x0a +#define DRM_VIRTGPU_CONTEXT_INIT 0x0b + +#define VIRTGPU_EXECBUF_FENCE_FD_IN 0x01 +#define VIRTGPU_EXECBUF_FENCE_FD_OUT 0x02 +#define VIRTGPU_EXECBUF_RING_IDX 0x04 +#define VIRTGPU_EXECBUF_FLAGS \ + (VIRTGPU_EXECBUF_FENCE_FD_IN | VIRTGPU_EXECBUF_FENCE_FD_OUT | \ + VIRTGPU_EXECBUF_RING_IDX | 0) + +struct drm_virtgpu_map { + __u64 offset; /* use for mmap system call */ + __u32 handle; + __u32 pad; +}; + +#define VIRTGPU_EXECBUF_SYNCOBJ_RESET 0x01 +#define VIRTGPU_EXECBUF_SYNCOBJ_FLAGS (VIRTGPU_EXECBUF_SYNCOBJ_RESET | 0) +struct drm_virtgpu_execbuffer_syncobj { + __u32 handle; + __u32 flags; + __u64 point; +}; + +/* fence_fd is modified on success if VIRTGPU_EXECBUF_FENCE_FD_OUT flag is set. + */ +struct drm_virtgpu_execbuffer { + __u32 flags; + __u32 size; + __u64 command; /* void* */ + __u64 bo_handles; + __u32 num_bo_handles; + __s32 fence_fd; /* in/out fence fd (see VIRTGPU_EXECBUF_FENCE_FD_IN/OUT) + */ + __u32 ring_idx; /* command ring index (see VIRTGPU_EXECBUF_RING_IDX) */ + __u32 syncobj_stride; /* size of @drm_virtgpu_execbuffer_syncobj */ + __u32 num_in_syncobjs; + __u32 num_out_syncobjs; + __u64 in_syncobjs; + __u64 out_syncobjs; +}; + +#define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */ +#define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */ +#define VIRTGPU_PARAM_RESOURCE_BLOB 3 /* DRM_VIRTGPU_RESOURCE_CREATE_BLOB */ +#define VIRTGPU_PARAM_HOST_VISIBLE 4 /* Host blob resources are mappable */ +#define VIRTGPU_PARAM_CROSS_DEVICE 5 /* Cross virtio-device resource sharing \ + */ +#define VIRTGPU_PARAM_CONTEXT_INIT 6 /* DRM_VIRTGPU_CONTEXT_INIT */ +#define VIRTGPU_PARAM_SUPPORTED_CAPSET_IDs \ + 7 /* Bitmask of supported capability set ids */ +#define VIRTGPU_PARAM_EXPLICIT_DEBUG_NAME \ + 8 /* Ability to set debug name from userspace */ + +struct drm_virtgpu_getparam { + __u64 param; + __u64 value; +}; + +/* NO_BO flags? NO resource flag? */ +/* resource flag for y_0_top */ +struct drm_virtgpu_resource_create { + __u32 target; + __u32 format; + __u32 bind; + __u32 width; + __u32 height; + __u32 depth; + __u32 array_size; + __u32 last_level; + __u32 nr_samples; + __u32 flags; + __u32 bo_handle; /* if this is set - recreate a new resource attached to + this bo ? */ + __u32 res_handle; /* returned by kernel */ + __u32 size; /* validate transfer in the host */ + __u32 stride; /* validate transfer in the host */ +}; + +struct drm_virtgpu_resource_info { + __u32 bo_handle; + __u32 res_handle; + __u32 size; + __u32 blob_mem; +}; + +struct drm_virtgpu_3d_box { + __u32 x; + __u32 y; + __u32 z; + __u32 w; + __u32 h; + __u32 d; +}; + +struct drm_virtgpu_3d_transfer_to_host { + __u32 bo_handle; + struct drm_virtgpu_3d_box box; + __u32 level; + __u32 offset; + __u32 stride; + __u32 layer_stride; +}; + +struct drm_virtgpu_3d_transfer_from_host { + __u32 bo_handle; + struct drm_virtgpu_3d_box box; + __u32 level; + __u32 offset; + __u32 stride; + __u32 layer_stride; +}; + +#define VIRTGPU_WAIT_NOWAIT 1 /* like it */ +struct drm_virtgpu_3d_wait { + __u32 handle; /* 0 is an invalid handle */ + __u32 flags; +}; + +#define VIRTGPU_DRM_CAPSET_VIRGL 1 +#define VIRTGPU_DRM_CAPSET_VIRGL2 2 +#define VIRTGPU_DRM_CAPSET_GFXSTREAM_VULKAN 3 +#define VIRTGPU_DRM_CAPSET_VENUS 4 +#define VIRTGPU_DRM_CAPSET_CROSS_DOMAIN 5 +#define VIRTGPU_DRM_CAPSET_DRM 6 +struct drm_virtgpu_get_caps { + __u32 cap_set_id; + __u32 cap_set_ver; + __u64 addr; + __u32 size; + __u32 pad; +}; + +struct drm_virtgpu_resource_create_blob { +#define VIRTGPU_BLOB_MEM_GUEST 0x0001 +#define VIRTGPU_BLOB_MEM_HOST3D 0x0002 +#define VIRTGPU_BLOB_MEM_HOST3D_GUEST 0x0003 + +#define VIRTGPU_BLOB_FLAG_USE_MAPPABLE 0x0001 +#define VIRTGPU_BLOB_FLAG_USE_SHAREABLE 0x0002 +#define VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE 0x0004 + /* zero is invalid blob_mem */ + __u32 blob_mem; + __u32 blob_flags; + __u32 bo_handle; + __u32 res_handle; + __u64 size; + + /* + * for 3D contexts with VIRTGPU_BLOB_MEM_HOST3D_GUEST and + * VIRTGPU_BLOB_MEM_HOST3D otherwise, must be zero. + */ + __u32 pad; + __u32 cmd_size; + __u64 cmd; + __u64 blob_id; +}; + +#define VIRTGPU_CONTEXT_PARAM_CAPSET_ID 0x0001 +#define VIRTGPU_CONTEXT_PARAM_NUM_RINGS 0x0002 +#define VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK 0x0003 +#define VIRTGPU_CONTEXT_PARAM_DEBUG_NAME 0x0004 +struct drm_virtgpu_context_set_param { + __u64 param; + __u64 value; +}; + +struct drm_virtgpu_context_init { + __u32 num_params; + __u32 pad; + + /* pointer to drm_virtgpu_context_set_param array */ + __u64 ctx_set_params; +}; + +/* + * Event code that's given when VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK is in + * effect. The event size is sizeof(drm_event), since there is no additional + * payload. + */ +#define VIRTGPU_EVENT_FENCE_SIGNALED 0x90000000 + +#define DRM_IOCTL_VIRTGPU_MAP \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map) + +#define DRM_IOCTL_VIRTGPU_EXECBUFFER \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_EXECBUFFER, \ + struct drm_virtgpu_execbuffer) + +#define DRM_IOCTL_VIRTGPU_GETPARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GETPARAM, \ + struct drm_virtgpu_getparam) + +#define DRM_IOCTL_VIRTGPU_RESOURCE_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_CREATE, \ + struct drm_virtgpu_resource_create) + +#define DRM_IOCTL_VIRTGPU_RESOURCE_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_INFO, \ + struct drm_virtgpu_resource_info) + +#define DRM_IOCTL_VIRTGPU_TRANSFER_FROM_HOST \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_TRANSFER_FROM_HOST, \ + struct drm_virtgpu_3d_transfer_from_host) + +#define DRM_IOCTL_VIRTGPU_TRANSFER_TO_HOST \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_TRANSFER_TO_HOST, \ + struct drm_virtgpu_3d_transfer_to_host) + +#define DRM_IOCTL_VIRTGPU_WAIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_WAIT, \ + struct drm_virtgpu_3d_wait) + +#define DRM_IOCTL_VIRTGPU_GET_CAPS \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GET_CAPS, \ + struct drm_virtgpu_get_caps) + +#define DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_CREATE_BLOB, \ + struct drm_virtgpu_resource_create_blob) + +#define DRM_IOCTL_VIRTGPU_CONTEXT_INIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_CONTEXT_INIT, \ + struct drm_virtgpu_context_init) + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/include/arch/x86_64/drm/vmwgfx_drm.h b/include/arch/x86_64/drm/vmwgfx_drm.h new file mode 100644 index 00000000..1103ccfd --- /dev/null +++ b/include/arch/x86_64/drm/vmwgfx_drm.h @@ -0,0 +1,1281 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ +/************************************************************************** + * + * Copyright © 2009-2023 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef __VMWGFX_DRM_H__ +#define __VMWGFX_DRM_H__ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_VMW_MAX_SURFACE_FACES 6 +#define DRM_VMW_MAX_MIP_LEVELS 24 + +#define DRM_VMW_GET_PARAM 0 +#define DRM_VMW_ALLOC_DMABUF 1 +#define DRM_VMW_ALLOC_BO 1 +#define DRM_VMW_UNREF_DMABUF 2 +#define DRM_VMW_HANDLE_CLOSE 2 +#define DRM_VMW_CURSOR_BYPASS 3 +/* guarded by DRM_VMW_PARAM_NUM_STREAMS != 0*/ +#define DRM_VMW_CONTROL_STREAM 4 +#define DRM_VMW_CLAIM_STREAM 5 +#define DRM_VMW_UNREF_STREAM 6 +/* guarded by DRM_VMW_PARAM_3D == 1 */ +#define DRM_VMW_CREATE_CONTEXT 7 +#define DRM_VMW_UNREF_CONTEXT 8 +#define DRM_VMW_CREATE_SURFACE 9 +#define DRM_VMW_UNREF_SURFACE 10 +#define DRM_VMW_REF_SURFACE 11 +#define DRM_VMW_EXECBUF 12 +#define DRM_VMW_GET_3D_CAP 13 +#define DRM_VMW_FENCE_WAIT 14 +#define DRM_VMW_FENCE_SIGNALED 15 +#define DRM_VMW_FENCE_UNREF 16 +#define DRM_VMW_FENCE_EVENT 17 +#define DRM_VMW_PRESENT 18 +#define DRM_VMW_PRESENT_READBACK 19 +#define DRM_VMW_UPDATE_LAYOUT 20 +#define DRM_VMW_CREATE_SHADER 21 +#define DRM_VMW_UNREF_SHADER 22 +#define DRM_VMW_GB_SURFACE_CREATE 23 +#define DRM_VMW_GB_SURFACE_REF 24 +#define DRM_VMW_SYNCCPU 25 +#define DRM_VMW_CREATE_EXTENDED_CONTEXT 26 +#define DRM_VMW_GB_SURFACE_CREATE_EXT 27 +#define DRM_VMW_GB_SURFACE_REF_EXT 28 +#define DRM_VMW_MSG 29 +#define DRM_VMW_MKSSTAT_RESET 30 +#define DRM_VMW_MKSSTAT_ADD 31 +#define DRM_VMW_MKSSTAT_REMOVE 32 + +/*************************************************************************/ +/** + * DRM_VMW_GET_PARAM - get device information. + * + * DRM_VMW_PARAM_FIFO_OFFSET: + * Offset to use to map the first page of the FIFO read-only. + * The fifo is mapped using the mmap() system call on the drm device. + * + * DRM_VMW_PARAM_OVERLAY_IOCTL: + * Does the driver support the overlay ioctl. + * + * DRM_VMW_PARAM_SM4_1 + * SM4_1 support is enabled. + * + * DRM_VMW_PARAM_SM5 + * SM5 support is enabled. + * + * DRM_VMW_PARAM_GL43 + * SM5.1+GL4.3 support is enabled. + * + * DRM_VMW_PARAM_DEVICE_ID + * PCI ID of the underlying SVGA device. + */ + +#define DRM_VMW_PARAM_NUM_STREAMS 0 +#define DRM_VMW_PARAM_NUM_FREE_STREAMS 1 +#define DRM_VMW_PARAM_3D 2 +#define DRM_VMW_PARAM_HW_CAPS 3 +#define DRM_VMW_PARAM_FIFO_CAPS 4 +#define DRM_VMW_PARAM_MAX_FB_SIZE 5 +#define DRM_VMW_PARAM_FIFO_HW_VERSION 6 +#define DRM_VMW_PARAM_MAX_SURF_MEMORY 7 +#define DRM_VMW_PARAM_3D_CAPS_SIZE 8 +#define DRM_VMW_PARAM_MAX_MOB_MEMORY 9 +#define DRM_VMW_PARAM_MAX_MOB_SIZE 10 +#define DRM_VMW_PARAM_SCREEN_TARGET 11 +#define DRM_VMW_PARAM_DX 12 +#define DRM_VMW_PARAM_HW_CAPS2 13 +#define DRM_VMW_PARAM_SM4_1 14 +#define DRM_VMW_PARAM_SM5 15 +#define DRM_VMW_PARAM_GL43 16 +#define DRM_VMW_PARAM_DEVICE_ID 17 + +/** + * enum drm_vmw_handle_type - handle type for ref ioctls + * + */ +enum drm_vmw_handle_type { + DRM_VMW_HANDLE_LEGACY = 0, + DRM_VMW_HANDLE_PRIME = 1 +}; + +/** + * struct drm_vmw_getparam_arg + * + * @value: Returned value. //Out + * @param: Parameter to query. //In. + * + * Argument to the DRM_VMW_GET_PARAM Ioctl. + */ + +struct drm_vmw_getparam_arg { + __u64 value; + __u32 param; + __u32 pad64; +}; + +/*************************************************************************/ +/** + * DRM_VMW_CREATE_CONTEXT - Create a host context. + * + * Allocates a device unique context id, and queues a create context command + * for the host. Does not wait for host completion. + */ + +/** + * struct drm_vmw_context_arg + * + * @cid: Device unique context ID. + * + * Output argument to the DRM_VMW_CREATE_CONTEXT Ioctl. + * Input argument to the DRM_VMW_UNREF_CONTEXT Ioctl. + */ + +struct drm_vmw_context_arg { + __s32 cid; + __u32 pad64; +}; + +/*************************************************************************/ +/** + * DRM_VMW_UNREF_CONTEXT - Create a host context. + * + * Frees a global context id, and queues a destroy host command for the host. + * Does not wait for host completion. The context ID can be used directly + * in the command stream and shows up as the same context ID on the host. + */ + +/*************************************************************************/ +/** + * DRM_VMW_CREATE_SURFACE - Create a host suface. + * + * Allocates a device unique surface id, and queues a create surface command + * for the host. Does not wait for host completion. The surface ID can be + * used directly in the command stream and shows up as the same surface + * ID on the host. + */ + +/** + * struct drm_wmv_surface_create_req + * + * @flags: Surface flags as understood by the host. + * @format: Surface format as understood by the host. + * @mip_levels: Number of mip levels for each face. + * An unused face should have 0 encoded. + * @size_addr: Address of a user-space array of sruct drm_vmw_size + * cast to an __u64 for 32-64 bit compatibility. + * The size of the array should equal the total number of mipmap levels. + * @shareable: Boolean whether other clients (as identified by file descriptors) + * may reference this surface. + * @scanout: Boolean whether the surface is intended to be used as a + * scanout. + * + * Input data to the DRM_VMW_CREATE_SURFACE Ioctl. + * Output data from the DRM_VMW_REF_SURFACE Ioctl. + */ + +struct drm_vmw_surface_create_req { + __u32 flags; + __u32 format; + __u32 mip_levels[DRM_VMW_MAX_SURFACE_FACES]; + __u64 size_addr; + __s32 shareable; + __s32 scanout; +}; + +/** + * struct drm_wmv_surface_arg + * + * @sid: Surface id of created surface or surface to destroy or reference. + * @handle_type: Handle type for DRM_VMW_REF_SURFACE Ioctl. + * + * Output data from the DRM_VMW_CREATE_SURFACE Ioctl. + * Input argument to the DRM_VMW_UNREF_SURFACE Ioctl. + * Input argument to the DRM_VMW_REF_SURFACE Ioctl. + */ + +struct drm_vmw_surface_arg { + __s32 sid; + enum drm_vmw_handle_type handle_type; +}; + +/** + * struct drm_vmw_size ioctl. + * + * @width - mip level width + * @height - mip level height + * @depth - mip level depth + * + * Description of a mip level. + * Input data to the DRM_WMW_CREATE_SURFACE Ioctl. + */ + +struct drm_vmw_size { + __u32 width; + __u32 height; + __u32 depth; + __u32 pad64; +}; + +/** + * union drm_vmw_surface_create_arg + * + * @rep: Output data as described above. + * @req: Input data as described above. + * + * Argument to the DRM_VMW_CREATE_SURFACE Ioctl. + */ + +union drm_vmw_surface_create_arg { + struct drm_vmw_surface_arg rep; + struct drm_vmw_surface_create_req req; +}; + +/*************************************************************************/ +/** + * DRM_VMW_REF_SURFACE - Reference a host surface. + * + * Puts a reference on a host surface with a give sid, as previously + * returned by the DRM_VMW_CREATE_SURFACE ioctl. + * A reference will make sure the surface isn't destroyed while we hold + * it and will allow the calling client to use the surface ID in the command + * stream. + * + * On successful return, the Ioctl returns the surface information given + * in the DRM_VMW_CREATE_SURFACE ioctl. + */ + +/** + * union drm_vmw_surface_reference_arg + * + * @rep: Output data as described above. + * @req: Input data as described above. + * + * Argument to the DRM_VMW_REF_SURFACE Ioctl. + */ + +union drm_vmw_surface_reference_arg { + struct drm_vmw_surface_create_req rep; + struct drm_vmw_surface_arg req; +}; + +/*************************************************************************/ +/** + * DRM_VMW_UNREF_SURFACE - Unreference a host surface. + * + * Clear a reference previously put on a host surface. + * When all references are gone, including the one implicitly placed + * on creation, + * a destroy surface command will be queued for the host. + * Does not wait for completion. + */ + +/*************************************************************************/ +/** + * DRM_VMW_EXECBUF + * + * Submit a command buffer for execution on the host, and return a + * fence seqno that when signaled, indicates that the command buffer has + * executed. + */ + +/** + * struct drm_vmw_execbuf_arg + * + * @commands: User-space address of a command buffer cast to an __u64. + * @command-size: Size in bytes of the command buffer. + * @throttle-us: Sleep until software is less than @throttle_us + * microseconds ahead of hardware. The driver may round this value + * to the nearest kernel tick. + * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an + * __u64. + * @version: Allows expanding the execbuf ioctl parameters without breaking + * backwards compatibility, since user-space will always tell the kernel + * which version it uses. + * @flags: Execbuf flags. + * @imported_fence_fd: FD for a fence imported from another device + * + * Argument to the DRM_VMW_EXECBUF Ioctl. + */ + +#define DRM_VMW_EXECBUF_VERSION 2 + +#define DRM_VMW_EXECBUF_FLAG_IMPORT_FENCE_FD (1 << 0) +#define DRM_VMW_EXECBUF_FLAG_EXPORT_FENCE_FD (1 << 1) + +struct drm_vmw_execbuf_arg { + __u64 commands; + __u32 command_size; + __u32 throttle_us; + __u64 fence_rep; + __u32 version; + __u32 flags; + __u32 context_handle; + __s32 imported_fence_fd; +}; + +/** + * struct drm_vmw_fence_rep + * + * @handle: Fence object handle for fence associated with a command submission. + * @mask: Fence flags relevant for this fence object. + * @seqno: Fence sequence number in fifo. A fence object with a lower + * seqno will signal the EXEC flag before a fence object with a higher + * seqno. This can be used by user-space to avoid kernel calls to determine + * whether a fence has signaled the EXEC flag. Note that @seqno will + * wrap at 32-bit. + * @passed_seqno: The highest seqno number processed by the hardware + * so far. This can be used to mark user-space fence objects as signaled, and + * to determine whether a fence seqno might be stale. + * @fd: FD associated with the fence, -1 if not exported + * @error: This member should've been set to -EFAULT on submission. + * The following actions should be take on completion: + * error == -EFAULT: Fence communication failed. The host is synchronized. + * Use the last fence id read from the FIFO fence register. + * error != 0 && error != -EFAULT: + * Fence submission failed. The host is synchronized. Use the fence_seq member. + * error == 0: All is OK, The host may not be synchronized. + * Use the fence_seq member. + * + * Input / Output data to the DRM_VMW_EXECBUF Ioctl. + */ + +struct drm_vmw_fence_rep { + __u32 handle; + __u32 mask; + __u32 seqno; + __u32 passed_seqno; + __s32 fd; + __s32 error; +}; + +/*************************************************************************/ +/** + * DRM_VMW_ALLOC_BO + * + * Allocate a buffer object that is visible also to the host. + * NOTE: The buffer is + * identified by a handle and an offset, which are private to the guest, but + * useable in the command stream. The guest kernel may translate these + * and patch up the command stream accordingly. In the future, the offset may + * be zero at all times, or it may disappear from the interface before it is + * fixed. + * + * The buffer object may stay user-space mapped in the guest at all times, + * and is thus suitable for sub-allocation. + * + * Buffer objects are mapped using the mmap() syscall on the drm device. + */ + +/** + * struct drm_vmw_alloc_bo_req + * + * @size: Required minimum size of the buffer. + * + * Input data to the DRM_VMW_ALLOC_BO Ioctl. + */ + +struct drm_vmw_alloc_bo_req { + __u32 size; + __u32 pad64; +}; +#define drm_vmw_alloc_dmabuf_req drm_vmw_alloc_bo_req + +/** + * struct drm_vmw_bo_rep + * + * @map_handle: Offset to use in the mmap() call used to map the buffer. + * @handle: Handle unique to this buffer. Used for unreferencing. + * @cur_gmr_id: GMR id to use in the command stream when this buffer is + * referenced. See not above. + * @cur_gmr_offset: Offset to use in the command stream when this buffer is + * referenced. See note above. + * + * Output data from the DRM_VMW_ALLOC_BO Ioctl. + */ + +struct drm_vmw_bo_rep { + __u64 map_handle; + __u32 handle; + __u32 cur_gmr_id; + __u32 cur_gmr_offset; + __u32 pad64; +}; +#define drm_vmw_dmabuf_rep drm_vmw_bo_rep + +/** + * union drm_vmw_alloc_bo_arg + * + * @req: Input data as described above. + * @rep: Output data as described above. + * + * Argument to the DRM_VMW_ALLOC_BO Ioctl. + */ + +union drm_vmw_alloc_bo_arg { + struct drm_vmw_alloc_bo_req req; + struct drm_vmw_bo_rep rep; +}; +#define drm_vmw_alloc_dmabuf_arg drm_vmw_alloc_bo_arg + +/*************************************************************************/ +/** + * DRM_VMW_CONTROL_STREAM - Control overlays, aka streams. + * + * This IOCTL controls the overlay units of the svga device. + * The SVGA overlay units does not work like regular hardware units in + * that they do not automaticaly read back the contents of the given dma + * buffer. But instead only read back for each call to this ioctl, and + * at any point between this call being made and a following call that + * either changes the buffer or disables the stream. + */ + +/** + * struct drm_vmw_rect + * + * Defines a rectangle. Used in the overlay ioctl to define + * source and destination rectangle. + */ + +struct drm_vmw_rect { + __s32 x; + __s32 y; + __u32 w; + __u32 h; +}; + +/** + * struct drm_vmw_control_stream_arg + * + * @stream_id: Stearm to control + * @enabled: If false all following arguments are ignored. + * @handle: Handle to buffer for getting data from. + * @format: Format of the overlay as understood by the host. + * @width: Width of the overlay. + * @height: Height of the overlay. + * @size: Size of the overlay in bytes. + * @pitch: Array of pitches, the two last are only used for YUV12 formats. + * @offset: Offset from start of dma buffer to overlay. + * @src: Source rect, must be within the defined area above. + * @dst: Destination rect, x and y may be negative. + * + * Argument to the DRM_VMW_CONTROL_STREAM Ioctl. + */ + +struct drm_vmw_control_stream_arg { + __u32 stream_id; + __u32 enabled; + + __u32 flags; + __u32 color_key; + + __u32 handle; + __u32 offset; + __s32 format; + __u32 size; + __u32 width; + __u32 height; + __u32 pitch[3]; + + __u32 pad64; + struct drm_vmw_rect src; + struct drm_vmw_rect dst; +}; + +/*************************************************************************/ +/** + * DRM_VMW_CURSOR_BYPASS - Give extra information about cursor bypass. + * + */ + +#define DRM_VMW_CURSOR_BYPASS_ALL (1 << 0) +#define DRM_VMW_CURSOR_BYPASS_FLAGS (1) + +/** + * struct drm_vmw_cursor_bypass_arg + * + * @flags: Flags. + * @crtc_id: Crtc id, only used if DMR_CURSOR_BYPASS_ALL isn't passed. + * @xpos: X position of cursor. + * @ypos: Y position of cursor. + * @xhot: X hotspot. + * @yhot: Y hotspot. + * + * Argument to the DRM_VMW_CURSOR_BYPASS Ioctl. + */ + +struct drm_vmw_cursor_bypass_arg { + __u32 flags; + __u32 crtc_id; + __s32 xpos; + __s32 ypos; + __s32 xhot; + __s32 yhot; +}; + +/*************************************************************************/ +/** + * DRM_VMW_CLAIM_STREAM - Claim a single stream. + */ + +/** + * struct drm_vmw_context_arg + * + * @stream_id: Device unique context ID. + * + * Output argument to the DRM_VMW_CREATE_CONTEXT Ioctl. + * Input argument to the DRM_VMW_UNREF_CONTEXT Ioctl. + */ + +struct drm_vmw_stream_arg { + __u32 stream_id; + __u32 pad64; +}; + +/*************************************************************************/ +/** + * DRM_VMW_UNREF_STREAM - Unclaim a stream. + * + * Return a single stream that was claimed by this process. Also makes + * sure that the stream has been stopped. + */ + +/*************************************************************************/ +/** + * DRM_VMW_GET_3D_CAP + * + * Read 3D capabilities from the FIFO + * + */ + +/** + * struct drm_vmw_get_3d_cap_arg + * + * @buffer: Pointer to a buffer for capability data, cast to an __u64 + * @size: Max size to copy + * + * Input argument to the DRM_VMW_GET_3D_CAP_IOCTL + * ioctls. + */ + +struct drm_vmw_get_3d_cap_arg { + __u64 buffer; + __u32 max_size; + __u32 pad64; +}; + +/*************************************************************************/ +/** + * DRM_VMW_FENCE_WAIT + * + * Waits for a fence object to signal. The wait is interruptible, so that + * signals may be delivered during the interrupt. The wait may timeout, + * in which case the calls returns -EBUSY. If the wait is restarted, + * that is restarting without resetting @cookie_valid to zero, + * the timeout is computed from the first call. + * + * The flags argument to the DRM_VMW_FENCE_WAIT ioctl indicates what to wait + * on: + * DRM_VMW_FENCE_FLAG_EXEC: All commands ahead of the fence in the command + * stream + * have executed. + * DRM_VMW_FENCE_FLAG_QUERY: All query results resulting from query finish + * commands + * in the buffer given to the EXECBUF ioctl returning the fence object handle + * are available to user-space. + * + * DRM_VMW_WAIT_OPTION_UNREF: If this wait option is given, and the + * fenc wait ioctl returns 0, the fence object has been unreferenced after + * the wait. + */ + +#define DRM_VMW_FENCE_FLAG_EXEC (1 << 0) +#define DRM_VMW_FENCE_FLAG_QUERY (1 << 1) + +#define DRM_VMW_WAIT_OPTION_UNREF (1 << 0) + +/** + * struct drm_vmw_fence_wait_arg + * + * @handle: Fence object handle as returned by the DRM_VMW_EXECBUF ioctl. + * @cookie_valid: Must be reset to 0 on first call. Left alone on restart. + * @kernel_cookie: Set to 0 on first call. Left alone on restart. + * @timeout_us: Wait timeout in microseconds. 0 for indefinite timeout. + * @lazy: Set to 1 if timing is not critical. Allow more than a kernel tick + * before returning. + * @flags: Fence flags to wait on. + * @wait_options: Options that control the behaviour of the wait ioctl. + * + * Input argument to the DRM_VMW_FENCE_WAIT ioctl. + */ + +struct drm_vmw_fence_wait_arg { + __u32 handle; + __s32 cookie_valid; + __u64 kernel_cookie; + __u64 timeout_us; + __s32 lazy; + __s32 flags; + __s32 wait_options; + __s32 pad64; +}; + +/*************************************************************************/ +/** + * DRM_VMW_FENCE_SIGNALED + * + * Checks if a fence object is signaled.. + */ + +/** + * struct drm_vmw_fence_signaled_arg + * + * @handle: Fence object handle as returned by the DRM_VMW_EXECBUF ioctl. + * @flags: Fence object flags input to DRM_VMW_FENCE_SIGNALED ioctl + * @signaled: Out: Flags signaled. + * @sequence: Out: Highest sequence passed so far. Can be used to signal the + * EXEC flag of user-space fence objects. + * + * Input/Output argument to the DRM_VMW_FENCE_SIGNALED and DRM_VMW_FENCE_UNREF + * ioctls. + */ + +struct drm_vmw_fence_signaled_arg { + __u32 handle; + __u32 flags; + __s32 signaled; + __u32 passed_seqno; + __u32 signaled_flags; + __u32 pad64; +}; + +/*************************************************************************/ +/** + * DRM_VMW_FENCE_UNREF + * + * Unreferences a fence object, and causes it to be destroyed if there are no + * other references to it. + * + */ + +/** + * struct drm_vmw_fence_arg + * + * @handle: Fence object handle as returned by the DRM_VMW_EXECBUF ioctl. + * + * Input/Output argument to the DRM_VMW_FENCE_UNREF ioctl.. + */ + +struct drm_vmw_fence_arg { + __u32 handle; + __u32 pad64; +}; + +/*************************************************************************/ +/** + * DRM_VMW_FENCE_EVENT + * + * Queues an event on a fence to be delivered on the drm character device + * when the fence has signaled the DRM_VMW_FENCE_FLAG_EXEC flag. + * Optionally the approximate time when the fence signaled is + * given by the event. + */ + +/* + * The event type + */ +#define DRM_VMW_EVENT_FENCE_SIGNALED 0x80000000 + +struct drm_vmw_event_fence { + struct drm_event base; + __u64 user_data; + __u32 tv_sec; + __u32 tv_usec; +}; + +/* + * Flags that may be given to the command. + */ +/* Request fence signaled time on the event. */ +#define DRM_VMW_FE_FLAG_REQ_TIME (1 << 0) + +/** + * struct drm_vmw_fence_event_arg + * + * @fence_rep: Pointer to fence_rep structure cast to __u64 or 0 if + * the fence is not supposed to be referenced by user-space. + * @user_info: Info to be delivered with the event. + * @handle: Attach the event to this fence only. + * @flags: A set of flags as defined above. + */ +struct drm_vmw_fence_event_arg { + __u64 fence_rep; + __u64 user_data; + __u32 handle; + __u32 flags; +}; + +/*************************************************************************/ +/** + * DRM_VMW_PRESENT + * + * Executes an SVGA present on a given fb for a given surface. The surface + * is placed on the framebuffer. Cliprects are given relative to the given + * point (the point disignated by dest_{x|y}). + * + */ + +/** + * struct drm_vmw_present_arg + * @fb_id: framebuffer id to present / read back from. + * @sid: Surface id to present from. + * @dest_x: X placement coordinate for surface. + * @dest_y: Y placement coordinate for surface. + * @clips_ptr: Pointer to an array of clip rects cast to an __u64. + * @num_clips: Number of cliprects given relative to the framebuffer origin, + * in the same coordinate space as the frame buffer. + * @pad64: Unused 64-bit padding. + * + * Input argument to the DRM_VMW_PRESENT ioctl. + */ + +struct drm_vmw_present_arg { + __u32 fb_id; + __u32 sid; + __s32 dest_x; + __s32 dest_y; + __u64 clips_ptr; + __u32 num_clips; + __u32 pad64; +}; + +/*************************************************************************/ +/** + * DRM_VMW_PRESENT_READBACK + * + * Executes an SVGA present readback from a given fb to the dma buffer + * currently bound as the fb. If there is no dma buffer bound to the fb, + * an error will be returned. + * + */ + +/** + * struct drm_vmw_present_arg + * @fb_id: fb_id to present / read back from. + * @num_clips: Number of cliprects. + * @clips_ptr: Pointer to an array of clip rects cast to an __u64. + * @fence_rep: Pointer to a struct drm_vmw_fence_rep, cast to an __u64. + * If this member is NULL, then the ioctl should not return a fence. + */ + +struct drm_vmw_present_readback_arg { + __u32 fb_id; + __u32 num_clips; + __u64 clips_ptr; + __u64 fence_rep; +}; + +/*************************************************************************/ +/** + * DRM_VMW_UPDATE_LAYOUT - Update layout + * + * Updates the preferred modes and connection status for connectors. The + * command consists of one drm_vmw_update_layout_arg pointing to an array + * of num_outputs drm_vmw_rect's. + */ + +/** + * struct drm_vmw_update_layout_arg + * + * @num_outputs: number of active connectors + * @rects: pointer to array of drm_vmw_rect cast to an __u64 + * + * Input argument to the DRM_VMW_UPDATE_LAYOUT Ioctl. + */ +struct drm_vmw_update_layout_arg { + __u32 num_outputs; + __u32 pad64; + __u64 rects; +}; + +/*************************************************************************/ +/** + * DRM_VMW_CREATE_SHADER - Create shader + * + * Creates a shader and optionally binds it to a dma buffer containing + * the shader byte-code. + */ + +/** + * enum drm_vmw_shader_type - Shader types + */ +enum drm_vmw_shader_type { + drm_vmw_shader_type_vs = 0, + drm_vmw_shader_type_ps, +}; + +/** + * struct drm_vmw_shader_create_arg + * + * @shader_type: Shader type of the shader to create. + * @size: Size of the byte-code in bytes. + * where the shader byte-code starts + * @buffer_handle: Buffer handle identifying the buffer containing the + * shader byte-code + * @shader_handle: On successful completion contains a handle that + * can be used to subsequently identify the shader. + * @offset: Offset in bytes into the buffer given by @buffer_handle, + * + * Input / Output argument to the DRM_VMW_CREATE_SHADER Ioctl. + */ +struct drm_vmw_shader_create_arg { + enum drm_vmw_shader_type shader_type; + __u32 size; + __u32 buffer_handle; + __u32 shader_handle; + __u64 offset; +}; + +/*************************************************************************/ +/** + * DRM_VMW_UNREF_SHADER - Unreferences a shader + * + * Destroys a user-space reference to a shader, optionally destroying + * it. + */ + +/** + * struct drm_vmw_shader_arg + * + * @handle: Handle identifying the shader to destroy. + * + * Input argument to the DRM_VMW_UNREF_SHADER ioctl. + */ +struct drm_vmw_shader_arg { + __u32 handle; + __u32 pad64; +}; + +/*************************************************************************/ +/** + * DRM_VMW_GB_SURFACE_CREATE - Create a host guest-backed surface. + * + * Allocates a surface handle and queues a create surface command + * for the host on the first use of the surface. The surface ID can + * be used as the surface ID in commands referencing the surface. + */ + +/** + * enum drm_vmw_surface_flags + * + * @drm_vmw_surface_flag_shareable: Deprecated - all userspace surfaces are + * shareable. + * @drm_vmw_surface_flag_scanout: Whether the surface is a scanout + * surface. + * @drm_vmw_surface_flag_create_buffer: Create a backup buffer if none is + * given. + * @drm_vmw_surface_flag_coherent: Back surface with coherent memory. + */ +enum drm_vmw_surface_flags { + drm_vmw_surface_flag_shareable = (1 << 0), + drm_vmw_surface_flag_scanout = (1 << 1), + drm_vmw_surface_flag_create_buffer = (1 << 2), + drm_vmw_surface_flag_coherent = (1 << 3), +}; + +/** + * struct drm_vmw_gb_surface_create_req + * + * @svga3d_flags: SVGA3d surface flags for the device. + * @format: SVGA3d format. + * @mip_level: Number of mip levels for all faces. + * @drm_surface_flags Flags as described above. + * @multisample_count Future use. Set to 0. + * @autogen_filter Future use. Set to 0. + * @buffer_handle Buffer handle of backup buffer. SVGA3D_INVALID_ID + * if none. + * @base_size Size of the base mip level for all faces. + * @array_size Must be zero for non-DX hardware, and if non-zero + * svga3d_flags must have proper bind flags setup. + * + * Input argument to the DRM_VMW_GB_SURFACE_CREATE Ioctl. + * Part of output argument for the DRM_VMW_GB_SURFACE_REF Ioctl. + */ +struct drm_vmw_gb_surface_create_req { + __u32 svga3d_flags; + __u32 format; + __u32 mip_levels; + enum drm_vmw_surface_flags drm_surface_flags; + __u32 multisample_count; + __u32 autogen_filter; + __u32 buffer_handle; + __u32 array_size; + struct drm_vmw_size base_size; +}; + +/** + * struct drm_vmw_gb_surface_create_rep + * + * @handle: Surface handle. + * @backup_size: Size of backup buffers for this surface. + * @buffer_handle: Handle of backup buffer. SVGA3D_INVALID_ID if none. + * @buffer_size: Actual size of the buffer identified by + * @buffer_handle + * @buffer_map_handle: Offset into device address space for the buffer + * identified by @buffer_handle. + * + * Part of output argument for the DRM_VMW_GB_SURFACE_REF ioctl. + * Output argument for the DRM_VMW_GB_SURFACE_CREATE ioctl. + */ +struct drm_vmw_gb_surface_create_rep { + __u32 handle; + __u32 backup_size; + __u32 buffer_handle; + __u32 buffer_size; + __u64 buffer_map_handle; +}; + +/** + * union drm_vmw_gb_surface_create_arg + * + * @req: Input argument as described above. + * @rep: Output argument as described above. + * + * Argument to the DRM_VMW_GB_SURFACE_CREATE ioctl. + */ +union drm_vmw_gb_surface_create_arg { + struct drm_vmw_gb_surface_create_rep rep; + struct drm_vmw_gb_surface_create_req req; +}; + +/*************************************************************************/ +/** + * DRM_VMW_GB_SURFACE_REF - Reference a host surface. + * + * Puts a reference on a host surface with a given handle, as previously + * returned by the DRM_VMW_GB_SURFACE_CREATE ioctl. + * A reference will make sure the surface isn't destroyed while we hold + * it and will allow the calling client to use the surface handle in + * the command stream. + * + * On successful return, the Ioctl returns the surface information given + * to and returned from the DRM_VMW_GB_SURFACE_CREATE ioctl. + */ + +/** + * struct drm_vmw_gb_surface_reference_arg + * + * @creq: The data used as input when the surface was created, as described + * above at "struct drm_vmw_gb_surface_create_req" + * @crep: Additional data output when the surface was created, as described + * above at "struct drm_vmw_gb_surface_create_rep" + * + * Output Argument to the DRM_VMW_GB_SURFACE_REF ioctl. + */ +struct drm_vmw_gb_surface_ref_rep { + struct drm_vmw_gb_surface_create_req creq; + struct drm_vmw_gb_surface_create_rep crep; +}; + +/** + * union drm_vmw_gb_surface_reference_arg + * + * @req: Input data as described above at "struct drm_vmw_surface_arg" + * @rep: Output data as described above at "struct drm_vmw_gb_surface_ref_rep" + * + * Argument to the DRM_VMW_GB_SURFACE_REF Ioctl. + */ +union drm_vmw_gb_surface_reference_arg { + struct drm_vmw_gb_surface_ref_rep rep; + struct drm_vmw_surface_arg req; +}; + +/*************************************************************************/ +/** + * DRM_VMW_SYNCCPU - Sync a DMA buffer / MOB for CPU access. + * + * Idles any previously submitted GPU operations on the buffer and + * by default blocks command submissions that reference the buffer. + * If the file descriptor used to grab a blocking CPU sync is closed, the + * cpu sync is released. + * The flags argument indicates how the grab / release operation should be + * performed: + */ + +/** + * enum drm_vmw_synccpu_flags - Synccpu flags: + * + * @drm_vmw_synccpu_read: Sync for read. If sync is done for read only, it's a + * hint to the kernel to allow command submissions that references the buffer + * for read-only. + * @drm_vmw_synccpu_write: Sync for write. Block all command submissions + * referencing this buffer. + * @drm_vmw_synccpu_dontblock: Dont wait for GPU idle, but rather return + * -EBUSY should the buffer be busy. + * @drm_vmw_synccpu_allow_cs: Allow command submission that touches the buffer + * while the buffer is synced for CPU. This is similar to the GEM bo idle + * behavior. + */ +enum drm_vmw_synccpu_flags { + drm_vmw_synccpu_read = (1 << 0), + drm_vmw_synccpu_write = (1 << 1), + drm_vmw_synccpu_dontblock = (1 << 2), + drm_vmw_synccpu_allow_cs = (1 << 3) +}; + +/** + * enum drm_vmw_synccpu_op - Synccpu operations: + * + * @drm_vmw_synccpu_grab: Grab the buffer for CPU operations + * @drm_vmw_synccpu_release: Release a previous grab. + */ +enum drm_vmw_synccpu_op { drm_vmw_synccpu_grab, drm_vmw_synccpu_release }; + +/** + * struct drm_vmw_synccpu_arg + * + * @op: The synccpu operation as described above. + * @handle: Handle identifying the buffer object. + * @flags: Flags as described above. + */ +struct drm_vmw_synccpu_arg { + enum drm_vmw_synccpu_op op; + enum drm_vmw_synccpu_flags flags; + __u32 handle; + __u32 pad64; +}; + +/*************************************************************************/ +/** + * DRM_VMW_CREATE_EXTENDED_CONTEXT - Create a host context. + * + * Allocates a device unique context id, and queues a create context command + * for the host. Does not wait for host completion. + */ +enum drm_vmw_extended_context { drm_vmw_context_legacy, drm_vmw_context_dx }; + +/** + * union drm_vmw_extended_context_arg + * + * @req: Context type. + * @rep: Context identifier. + * + * Argument to the DRM_VMW_CREATE_EXTENDED_CONTEXT Ioctl. + */ +union drm_vmw_extended_context_arg { + enum drm_vmw_extended_context req; + struct drm_vmw_context_arg rep; +}; + +/*************************************************************************/ +/* + * DRM_VMW_HANDLE_CLOSE - Close a user-space handle and release its + * underlying resource. + * + * Note that this ioctl is overlaid on the deprecated DRM_VMW_UNREF_DMABUF + * Ioctl. + */ + +/** + * struct drm_vmw_handle_close_arg + * + * @handle: Handle to close. + * + * Argument to the DRM_VMW_HANDLE_CLOSE Ioctl. + */ +struct drm_vmw_handle_close_arg { + __u32 handle; + __u32 pad64; +}; +#define drm_vmw_unref_dmabuf_arg drm_vmw_handle_close_arg + +/*************************************************************************/ +/** + * DRM_VMW_GB_SURFACE_CREATE_EXT - Create a host guest-backed surface. + * + * Allocates a surface handle and queues a create surface command + * for the host on the first use of the surface. The surface ID can + * be used as the surface ID in commands referencing the surface. + * + * This new command extends DRM_VMW_GB_SURFACE_CREATE by adding version + * parameter and 64 bit svga flag. + */ + +/** + * enum drm_vmw_surface_version + * + * @drm_vmw_surface_gb_v1: Corresponds to current gb surface format with + * svga3d surface flags split into 2, upper half and lower half. + */ +enum drm_vmw_surface_version { + drm_vmw_gb_surface_v1, +}; + +/** + * struct drm_vmw_gb_surface_create_ext_req + * + * @base: Surface create parameters. + * @version: Version of surface create ioctl. + * @svga3d_flags_upper_32_bits: Upper 32 bits of svga3d flags. + * @multisample_pattern: Multisampling pattern when msaa is supported. + * @quality_level: Precision settings for each sample. + * @buffer_byte_stride: Buffer byte stride. + * @must_be_zero: Reserved for future usage. + * + * Input argument to the DRM_VMW_GB_SURFACE_CREATE_EXT Ioctl. + * Part of output argument for the DRM_VMW_GB_SURFACE_REF_EXT Ioctl. + */ +struct drm_vmw_gb_surface_create_ext_req { + struct drm_vmw_gb_surface_create_req base; + enum drm_vmw_surface_version version; + __u32 svga3d_flags_upper_32_bits; + __u32 multisample_pattern; + __u32 quality_level; + __u32 buffer_byte_stride; + __u32 must_be_zero; +}; + +/** + * union drm_vmw_gb_surface_create_ext_arg + * + * @req: Input argument as described above. + * @rep: Output argument as described above. + * + * Argument to the DRM_VMW_GB_SURFACE_CREATE_EXT ioctl. + */ +union drm_vmw_gb_surface_create_ext_arg { + struct drm_vmw_gb_surface_create_rep rep; + struct drm_vmw_gb_surface_create_ext_req req; +}; + +/*************************************************************************/ +/** + * DRM_VMW_GB_SURFACE_REF_EXT - Reference a host surface. + * + * Puts a reference on a host surface with a given handle, as previously + * returned by the DRM_VMW_GB_SURFACE_CREATE_EXT ioctl. + * A reference will make sure the surface isn't destroyed while we hold + * it and will allow the calling client to use the surface handle in + * the command stream. + * + * On successful return, the Ioctl returns the surface information given + * to and returned from the DRM_VMW_GB_SURFACE_CREATE_EXT ioctl. + */ + +/** + * struct drm_vmw_gb_surface_ref_ext_rep + * + * @creq: The data used as input when the surface was created, as described + * above at "struct drm_vmw_gb_surface_create_ext_req" + * @crep: Additional data output when the surface was created, as described + * above at "struct drm_vmw_gb_surface_create_rep" + * + * Output Argument to the DRM_VMW_GB_SURFACE_REF_EXT ioctl. + */ +struct drm_vmw_gb_surface_ref_ext_rep { + struct drm_vmw_gb_surface_create_ext_req creq; + struct drm_vmw_gb_surface_create_rep crep; +}; + +/** + * union drm_vmw_gb_surface_reference_ext_arg + * + * @req: Input data as described above at "struct drm_vmw_surface_arg" + * @rep: Output data as described above at + * "struct drm_vmw_gb_surface_ref_ext_rep" + * + * Argument to the DRM_VMW_GB_SURFACE_REF Ioctl. + */ +union drm_vmw_gb_surface_reference_ext_arg { + struct drm_vmw_gb_surface_ref_ext_rep rep; + struct drm_vmw_surface_arg req; +}; + +/** + * struct drm_vmw_msg_arg + * + * @send: Pointer to user-space msg string (null terminated). + * @receive: Pointer to user-space receive buffer. + * @send_only: Boolean whether this is only sending or receiving too. + * + * Argument to the DRM_VMW_MSG ioctl. + */ +struct drm_vmw_msg_arg { + __u64 send; + __u64 receive; + __s32 send_only; + __u32 receive_len; +}; + +/** + * struct drm_vmw_mksstat_add_arg + * + * @stat: Pointer to user-space stat-counters array, page-aligned. + * @info: Pointer to user-space counter-infos array, page-aligned. + * @strs: Pointer to user-space stat strings, page-aligned. + * @stat_len: Length in bytes of stat-counters array. + * @info_len: Length in bytes of counter-infos array. + * @strs_len: Length in bytes of the stat strings, terminators included. + * @description: Pointer to instance descriptor string; will be truncated + * to MKS_GUEST_STAT_INSTANCE_DESC_LENGTH chars. + * @id: Output identifier of the produced record; -1 if error. + * + * Argument to the DRM_VMW_MKSSTAT_ADD ioctl. + */ +struct drm_vmw_mksstat_add_arg { + __u64 stat; + __u64 info; + __u64 strs; + __u64 stat_len; + __u64 info_len; + __u64 strs_len; + __u64 description; + __u64 id; +}; + +/** + * struct drm_vmw_mksstat_remove_arg + * + * @id: Identifier of the record being disposed, originally obtained through + * DRM_VMW_MKSSTAT_ADD ioctl. + * + * Argument to the DRM_VMW_MKSSTAT_REMOVE ioctl. + */ +struct drm_vmw_mksstat_remove_arg { + __u64 id; +}; + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/include/arch/x86_64/drm/xe_drm.h b/include/arch/x86_64/drm/xe_drm.h new file mode 100644 index 00000000..e170d120 --- /dev/null +++ b/include/arch/x86_64/drm/xe_drm.h @@ -0,0 +1,2019 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef _XE_DRM_H_ +#define _XE_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints. + * Sections in this file are organized as follows: + * 1. IOCTL definition + * 2. Extension definition and helper structs + * 3. IOCTL's Query structs in the order of the Query's entries. + * 4. The rest of IOCTL structs in the order of IOCTL declaration. + */ + +/** + * DOC: Xe Device Block Diagram + * + * The diagram below represents a high-level simplification of a discrete + * GPU supported by the Xe driver. It shows some device components which + * are necessary to understand this API, as well as how their relations + * to each other. This diagram does not represent real hardware:: + * + * ┌──────────────────────────────────────────────────────────────────┐ + * │ ┌──────────────────────────────────────────────────┐ ┌─────────┐ │ + * │ │ ┌───────────────────────┐ ┌─────┐ │ │ ┌─────┐ │ │ + * │ │ │ VRAM0 ├───┤ ... │ │ │ │VRAM1│ │ │ + * │ │ └───────────┬───────────┘ └─GT1─┘ │ │ └──┬──┘ │ │ + * │ │ ┌──────────────────┴───────────────────────────┐ │ │ ┌──┴──┐ │ │ + * │ │ │ ┌─────────────────────┐ ┌─────────────────┐ │ │ │ │ │ │ │ + * │ │ │ │ ┌──┐ ┌──┐ ┌──┐ ┌──┐ │ │ ┌─────┐ ┌─────┐ │ │ │ │ │ │ │ │ + * │ │ │ │ │EU│ │EU│ │EU│ │EU│ │ │ │RCS0 │ │BCS0 │ │ │ │ │ │ │ │ │ + * │ │ │ │ └──┘ └──┘ └──┘ └──┘ │ │ └─────┘ └─────┘ │ │ │ │ │ │ │ │ + * │ │ │ │ ┌──┐ ┌──┐ ┌──┐ ┌──┐ │ │ ┌─────┐ ┌─────┐ │ │ │ │ │ │ │ │ + * │ │ │ │ │EU│ │EU│ │EU│ │EU│ │ │ │VCS0 │ │VCS1 │ │ │ │ │ │ │ │ │ + * │ │ │ │ └──┘ └──┘ └──┘ └──┘ │ │ └─────┘ └─────┘ │ │ │ │ │ │ │ │ + * │ │ │ │ ┌──┐ ┌──┐ ┌──┐ ┌──┐ │ │ ┌─────┐ ┌─────┐ │ │ │ │ │ │ │ │ + * │ │ │ │ │EU│ │EU│ │EU│ │EU│ │ │ │VECS0│ │VECS1│ │ │ │ │ │ ... │ │ │ + * │ │ │ │ └──┘ └──┘ └──┘ └──┘ │ │ └─────┘ └─────┘ │ │ │ │ │ │ │ │ + * │ │ │ │ ┌──┐ ┌──┐ ┌──┐ ┌──┐ │ │ ┌─────┐ ┌─────┐ │ │ │ │ │ │ │ │ + * │ │ │ │ │EU│ │EU│ │EU│ │EU│ │ │ │CCS0 │ │CCS1 │ │ │ │ │ │ │ │ │ + * │ │ │ │ └──┘ └──┘ └──┘ └──┘ │ │ └─────┘ └─────┘ │ │ │ │ │ │ │ │ + * │ │ │ └─────────DSS─────────┘ │ ┌─────┐ ┌─────┐ │ │ │ │ │ │ │ │ + * │ │ │ │ │CCS2 │ │CCS3 │ │ │ │ │ │ │ │ │ + * │ │ │ ┌─────┐ ┌─────┐ ┌─────┐ │ └─────┘ └─────┘ │ │ │ │ │ │ │ │ + * │ │ │ │ ... │ │ ... │ │ ... │ │ │ │ │ │ │ │ │ │ + * │ │ │ └─DSS─┘ └─DSS─┘ └─DSS─┘ └─────Engines─────┘ │ │ │ │ │ │ │ + * │ │ └───────────────────────────GT0────────────────┘ │ │ └─GT2─┘ │ │ + * │ └────────────────────────────Tile0─────────────────┘ └─ Tile1──┘ │ + * └─────────────────────────────Device0───────┬──────────────────────┘ + * │ + * ───────────────────────┴────────── PCI bus + */ + +/** + * DOC: Xe uAPI Overview + * + * This section aims to describe the Xe's IOCTL entries, its structs, and other + * Xe related uAPI such as uevents and PMU (Platform Monitoring Unit) related + * entries and usage. + * + * List of supported IOCTLs: + * - &DRM_IOCTL_XE_DEVICE_QUERY + * - &DRM_IOCTL_XE_GEM_CREATE + * - &DRM_IOCTL_XE_GEM_MMAP_OFFSET + * - &DRM_IOCTL_XE_VM_CREATE + * - &DRM_IOCTL_XE_VM_DESTROY + * - &DRM_IOCTL_XE_VM_BIND + * - &DRM_IOCTL_XE_EXEC_QUEUE_CREATE + * - &DRM_IOCTL_XE_EXEC_QUEUE_DESTROY + * - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY + * - &DRM_IOCTL_XE_EXEC + * - &DRM_IOCTL_XE_WAIT_USER_FENCE + * - &DRM_IOCTL_XE_OBSERVATION + */ + +/* + * xe specific ioctls. + * + * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie + * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset + * against DRM_COMMAND_BASE and should be between [0x0, 0x60). + */ +#define DRM_XE_DEVICE_QUERY 0x00 +#define DRM_XE_GEM_CREATE 0x01 +#define DRM_XE_GEM_MMAP_OFFSET 0x02 +#define DRM_XE_VM_CREATE 0x03 +#define DRM_XE_VM_DESTROY 0x04 +#define DRM_XE_VM_BIND 0x05 +#define DRM_XE_EXEC_QUEUE_CREATE 0x06 +#define DRM_XE_EXEC_QUEUE_DESTROY 0x07 +#define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08 +#define DRM_XE_EXEC 0x09 +#define DRM_XE_WAIT_USER_FENCE 0x0a +#define DRM_XE_OBSERVATION 0x0b + +/* Must be kept compact -- no holes */ + +#define DRM_IOCTL_XE_DEVICE_QUERY \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, \ + struct drm_xe_device_query) +#define DRM_IOCTL_XE_GEM_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_CREATE, struct drm_xe_gem_create) +#define DRM_IOCTL_XE_GEM_MMAP_OFFSET \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_MMAP_OFFSET, \ + struct drm_xe_gem_mmap_offset) +#define DRM_IOCTL_XE_VM_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_CREATE, struct drm_xe_vm_create) +#define DRM_IOCTL_XE_VM_DESTROY \ + DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_DESTROY, struct drm_xe_vm_destroy) +#define DRM_IOCTL_XE_VM_BIND \ + DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_BIND, struct drm_xe_vm_bind) +#define DRM_IOCTL_XE_EXEC_QUEUE_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_CREATE, \ + struct drm_xe_exec_queue_create) +#define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY \ + DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, \ + struct drm_xe_exec_queue_destroy) +#define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, \ + struct drm_xe_exec_queue_get_property) +#define DRM_IOCTL_XE_EXEC \ + DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) +#define DRM_IOCTL_XE_WAIT_USER_FENCE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, \ + struct drm_xe_wait_user_fence) +#define DRM_IOCTL_XE_OBSERVATION \ + DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, \ + struct drm_xe_observation_param) + +/** + * DOC: Xe IOCTL Extensions + * + * Before detailing the IOCTLs and its structs, it is important to highlight + * that every IOCTL in Xe is extensible. + * + * Many interfaces need to grow over time. In most cases we can simply + * extend the struct and have userspace pass in more data. Another option, + * as demonstrated by Vulkan's approach to providing extensions for forward + * and backward compatibility, is to use a list of optional structs to + * provide those extra details. + * + * The key advantage to using an extension chain is that it allows us to + * redefine the interface more easily than an ever growing struct of + * increasing complexity, and for large parts of that interface to be + * entirely optional. The downside is more pointer chasing; chasing across + * the boundary with pointers encapsulated inside u64. + * + * Example chaining: + * + * .. code-block:: C + * + * struct drm_xe_user_extension ext3 { + * .next_extension = 0, // end + * .name = ..., + * }; + * struct drm_xe_user_extension ext2 { + * .next_extension = (uintptr_t)&ext3, + * .name = ..., + * }; + * struct drm_xe_user_extension ext1 { + * .next_extension = (uintptr_t)&ext2, + * .name = ..., + * }; + * + * Typically the struct drm_xe_user_extension would be embedded in some uAPI + * struct, and in this case we would feed it the head of the chain(i.e ext1), + * which would then apply all of the above extensions. + */ + +/** + * struct drm_xe_user_extension - Base class for defining a chain of extensions + */ +struct drm_xe_user_extension { + /** + * @next_extension: + * + * Pointer to the next struct drm_xe_user_extension, or zero if the end. + */ + __u64 next_extension; + + /** + * @name: Name of the extension. + * + * Note that the name here is just some integer. + * + * Also note that the name space for this is not global for the whole + * driver, but rather its scope/meaning is limited to the specific piece + * of uAPI which has embedded the struct drm_xe_user_extension. + */ + __u32 name; + + /** + * @pad: MBZ + * + * All undefined bits must be zero. + */ + __u32 pad; +}; + +/** + * struct drm_xe_ext_set_property - Generic set property extension + * + * A generic struct that allows any of the Xe's IOCTL to be extended + * with a set_property operation. + */ +struct drm_xe_ext_set_property { + /** @base: base user extension */ + struct drm_xe_user_extension base; + + /** @property: property to set */ + __u32 property; + + /** @pad: MBZ */ + __u32 pad; + + /** @value: property value */ + __u64 value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * struct drm_xe_engine_class_instance - instance of an engine class + * + * It is returned as part of the @drm_xe_engine, but it also is used as + * the input of engine selection for both @drm_xe_exec_queue_create and + * @drm_xe_query_engine_cycles + * + * The @engine_class can be: + * - %DRM_XE_ENGINE_CLASS_RENDER + * - %DRM_XE_ENGINE_CLASS_COPY + * - %DRM_XE_ENGINE_CLASS_VIDEO_DECODE + * - %DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE + * - %DRM_XE_ENGINE_CLASS_COMPUTE + * - %DRM_XE_ENGINE_CLASS_VM_BIND - Kernel only classes (not actual + * hardware engine class). Used for creating ordered queues of VM + * bind operations. + */ +struct drm_xe_engine_class_instance { +#define DRM_XE_ENGINE_CLASS_RENDER 0 +#define DRM_XE_ENGINE_CLASS_COPY 1 +#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE 2 +#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE 3 +#define DRM_XE_ENGINE_CLASS_COMPUTE 4 +#define DRM_XE_ENGINE_CLASS_VM_BIND 5 + /** @engine_class: engine class id */ + __u16 engine_class; + /** @engine_instance: engine instance id */ + __u16 engine_instance; + /** @gt_id: Unique ID of this GT within the PCI Device */ + __u16 gt_id; + /** @pad: MBZ */ + __u16 pad; +}; + +/** + * struct drm_xe_engine - describe hardware engine + */ +struct drm_xe_engine { + /** @instance: The @drm_xe_engine_class_instance */ + struct drm_xe_engine_class_instance instance; + + /** @reserved: Reserved */ + __u64 reserved[3]; +}; + +/** + * struct drm_xe_query_engines - describe engines + * + * If a query is made with a struct @drm_xe_device_query where .query + * is equal to %DRM_XE_DEVICE_QUERY_ENGINES, then the reply uses an array of + * struct @drm_xe_query_engines in .data. + */ +struct drm_xe_query_engines { + /** @num_engines: number of engines returned in @engines */ + __u32 num_engines; + /** @pad: MBZ */ + __u32 pad; + /** @engines: The returned engines for this device */ + struct drm_xe_engine engines[]; +}; + +/** + * enum drm_xe_memory_class - Supported memory classes. + */ +enum drm_xe_memory_class { + /** @DRM_XE_MEM_REGION_CLASS_SYSMEM: Represents system memory. */ + DRM_XE_MEM_REGION_CLASS_SYSMEM = 0, + /** + * @DRM_XE_MEM_REGION_CLASS_VRAM: On discrete platforms, this + * represents the memory that is local to the device, which we + * call VRAM. Not valid on integrated platforms. + */ + DRM_XE_MEM_REGION_CLASS_VRAM +}; + +/** + * struct drm_xe_mem_region - Describes some region as known to + * the driver. + */ +struct drm_xe_mem_region { + /** + * @mem_class: The memory class describing this region. + * + * See enum drm_xe_memory_class for supported values. + */ + __u16 mem_class; + /** + * @instance: The unique ID for this region, which serves as the + * index in the placement bitmask used as argument for + * &DRM_IOCTL_XE_GEM_CREATE + */ + __u16 instance; + /** + * @min_page_size: Min page-size in bytes for this region. + * + * When the kernel allocates memory for this region, the + * underlying pages will be at least @min_page_size in size. + * Buffer objects with an allowable placement in this region must be + * created with a size aligned to this value. + * GPU virtual address mappings of (parts of) buffer objects that + * may be placed in this region must also have their GPU virtual + * address and range aligned to this value. + * Affected IOCTLS will return %-EINVAL if alignment restrictions are + * not met. + */ + __u32 min_page_size; + /** + * @total_size: The usable size in bytes for this region. + */ + __u64 total_size; + /** + * @used: Estimate of the memory used in bytes for this region. + * + * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable + * accounting. Without this the value here will always equal + * zero. + */ + __u64 used; + /** + * @cpu_visible_size: How much of this region can be CPU + * accessed, in bytes. + * + * This will always be <= @total_size, and the remainder (if + * any) will not be CPU accessible. If the CPU accessible part + * is smaller than @total_size then this is referred to as a + * small BAR system. + * + * On systems without small BAR (full BAR), the probed_size will + * always equal the @total_size, since all of it will be CPU + * accessible. + * + * Note this is only tracked for DRM_XE_MEM_REGION_CLASS_VRAM + * regions (for other types the value here will always equal + * zero). + */ + __u64 cpu_visible_size; + /** + * @cpu_visible_used: Estimate of CPU visible memory used, in + * bytes. + * + * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable + * accounting. Without this the value here will always equal + * zero. Note this is only currently tracked for + * DRM_XE_MEM_REGION_CLASS_VRAM regions (for other types the value + * here will always be zero). + */ + __u64 cpu_visible_used; + /** @reserved: Reserved */ + __u64 reserved[6]; +}; + +/** + * struct drm_xe_query_mem_regions - describe memory regions + * + * If a query is made with a struct drm_xe_device_query where .query + * is equal to DRM_XE_DEVICE_QUERY_MEM_REGIONS, then the reply uses + * struct drm_xe_query_mem_regions in .data. + */ +struct drm_xe_query_mem_regions { + /** @num_mem_regions: number of memory regions returned in @mem_regions + */ + __u32 num_mem_regions; + /** @pad: MBZ */ + __u32 pad; + /** @mem_regions: The returned memory regions for this device */ + struct drm_xe_mem_region mem_regions[]; +}; + +/** + * struct drm_xe_query_config - describe the device configuration + * + * If a query is made with a struct drm_xe_device_query where .query + * is equal to DRM_XE_DEVICE_QUERY_CONFIG, then the reply uses + * struct drm_xe_query_config in .data. + * + * The index in @info can be: + * - %DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID - Device ID (lower 16 bits) + * and the device revision (next 8 bits) + * - %DRM_XE_QUERY_CONFIG_FLAGS - Flags describing the device + * configuration, see list below + * + * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM - Flag is set if the device + * has usable VRAM + * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY - Flag is set if the device + * has low latency hint support + * - %DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR - Flag is set if the + * device has CPU address mirroring support + * - %DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT - Minimal memory alignment + * required by this device, typically SZ_4K or SZ_64K + * - %DRM_XE_QUERY_CONFIG_VA_BITS - Maximum bits of a virtual address + * - %DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY - Value of the highest + * available exec queue priority + */ +struct drm_xe_query_config { + /** @num_params: number of parameters returned in info */ + __u32 num_params; + + /** @pad: MBZ */ + __u32 pad; + +#define DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID 0 +#define DRM_XE_QUERY_CONFIG_FLAGS 1 +#define DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM (1 << 0) +#define DRM_XE_QUERY_CONFIG_FLAG_HAS_LOW_LATENCY (1 << 1) +#define DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR (1 << 2) +#define DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT 2 +#define DRM_XE_QUERY_CONFIG_VA_BITS 3 +#define DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 4 + /** @info: array of elements containing the config info */ + __u64 info[]; +}; + +/** + * struct drm_xe_gt - describe an individual GT. + * + * To be used with drm_xe_query_gt_list, which will return a list with all the + * existing GT individual descriptions. + * Graphics Technology (GT) is a subset of a GPU/tile that is responsible for + * implementing graphics and/or media operations. + * + * The index in @type can be: + * - %DRM_XE_QUERY_GT_TYPE_MAIN + * - %DRM_XE_QUERY_GT_TYPE_MEDIA + */ +struct drm_xe_gt { +#define DRM_XE_QUERY_GT_TYPE_MAIN 0 +#define DRM_XE_QUERY_GT_TYPE_MEDIA 1 + /** @type: GT type: Main or Media */ + __u16 type; + /** @tile_id: Tile ID where this GT lives (Information only) */ + __u16 tile_id; + /** @gt_id: Unique ID of this GT within the PCI Device */ + __u16 gt_id; + /** @pad: MBZ */ + __u16 pad[3]; + /** @reference_clock: A clock frequency for timestamp */ + __u32 reference_clock; + /** + * @near_mem_regions: Bit mask of instances from + * drm_xe_query_mem_regions that are nearest to the current engines + * of this GT. + * Each index in this mask refers directly to the struct + * drm_xe_query_mem_regions' instance, no assumptions should + * be made about order. The type of each region is described + * by struct drm_xe_query_mem_regions' mem_class. + */ + __u64 near_mem_regions; + /** + * @far_mem_regions: Bit mask of instances from + * drm_xe_query_mem_regions that are far from the engines of this GT. + * In general, they have extra indirections when compared to the + * @near_mem_regions. For a discrete device this could mean system + * memory and memory living in a different tile. + * Each index in this mask refers directly to the struct + * drm_xe_query_mem_regions' instance, no assumptions should + * be made about order. The type of each region is described + * by struct drm_xe_query_mem_regions' mem_class. + */ + __u64 far_mem_regions; + /** @ip_ver_major: Graphics/media IP major version on GMD_ID platforms + */ + __u16 ip_ver_major; + /** @ip_ver_minor: Graphics/media IP minor version on GMD_ID platforms + */ + __u16 ip_ver_minor; + /** @ip_ver_rev: Graphics/media IP revision version on GMD_ID platforms + */ + __u16 ip_ver_rev; + /** @pad2: MBZ */ + __u16 pad2; + /** @reserved: Reserved */ + __u64 reserved[7]; +}; + +/** + * struct drm_xe_query_gt_list - A list with GT description items. + * + * If a query is made with a struct drm_xe_device_query where .query + * is equal to DRM_XE_DEVICE_QUERY_GT_LIST, then the reply uses struct + * drm_xe_query_gt_list in .data. + */ +struct drm_xe_query_gt_list { + /** @num_gt: number of GT items returned in gt_list */ + __u32 num_gt; + /** @pad: MBZ */ + __u32 pad; + /** @gt_list: The GT list returned for this device */ + struct drm_xe_gt gt_list[]; +}; + +/** + * struct drm_xe_query_topology_mask - describe the topology mask of a GT + * + * This is the hardware topology which reflects the internal physical + * structure of the GPU. + * + * If a query is made with a struct drm_xe_device_query where .query + * is equal to DRM_XE_DEVICE_QUERY_GT_TOPOLOGY, then the reply uses + * struct drm_xe_query_topology_mask in .data. + * + * The @type can be: + * - %DRM_XE_TOPO_DSS_GEOMETRY - To query the mask of Dual Sub Slices + * (DSS) available for geometry operations. For example a query response + * containing the following in mask: + * ``DSS_GEOMETRY ff ff ff ff 00 00 00 00`` + * means 32 DSS are available for geometry. + * - %DRM_XE_TOPO_DSS_COMPUTE - To query the mask of Dual Sub Slices + * (DSS) available for compute operations. For example a query response + * containing the following in mask: + * ``DSS_COMPUTE ff ff ff ff 00 00 00 00`` + * means 32 DSS are available for compute. + * - %DRM_XE_TOPO_L3_BANK - To query the mask of enabled L3 banks. This type + * may be omitted if the driver is unable to query the mask from the + * hardware. + * - %DRM_XE_TOPO_EU_PER_DSS - To query the mask of Execution Units (EU) + * available per Dual Sub Slices (DSS). For example a query response + * containing the following in mask: + * ``EU_PER_DSS ff ff 00 00 00 00 00 00`` + * means each DSS has 16 SIMD8 EUs. This type may be omitted if device + * doesn't have SIMD8 EUs. + * - %DRM_XE_TOPO_SIMD16_EU_PER_DSS - To query the mask of SIMD16 Execution + * Units (EU) available per Dual Sub Slices (DSS). For example a query + * response containing the following in mask: + * ``SIMD16_EU_PER_DSS ff ff 00 00 00 00 00 00`` + * means each DSS has 16 SIMD16 EUs. This type may be omitted if device + * doesn't have SIMD16 EUs. + */ +struct drm_xe_query_topology_mask { + /** @gt_id: GT ID the mask is associated with */ + __u16 gt_id; + +#define DRM_XE_TOPO_DSS_GEOMETRY 1 +#define DRM_XE_TOPO_DSS_COMPUTE 2 +#define DRM_XE_TOPO_L3_BANK 3 +#define DRM_XE_TOPO_EU_PER_DSS 4 +#define DRM_XE_TOPO_SIMD16_EU_PER_DSS 5 + /** @type: type of mask */ + __u16 type; + + /** @num_bytes: number of bytes in requested mask */ + __u32 num_bytes; + + /** @mask: little-endian mask of @num_bytes */ + __u8 mask[]; +}; + +/** + * struct drm_xe_query_engine_cycles - correlate CPU and GPU timestamps + * + * If a query is made with a struct drm_xe_device_query where .query is equal to + * DRM_XE_DEVICE_QUERY_ENGINE_CYCLES, then the reply uses struct + * drm_xe_query_engine_cycles in .data. struct drm_xe_query_engine_cycles is + * allocated by the user and .data points to this allocated structure. + * + * The query returns the engine cycles, which along with GT's @reference_clock, + * can be used to calculate the engine timestamp. In addition the + * query returns a set of cpu timestamps that indicate when the command + * streamer cycle count was captured. + */ +struct drm_xe_query_engine_cycles { + /** + * @eci: This is input by the user and is the engine for which command + * streamer cycles is queried. + */ + struct drm_xe_engine_class_instance eci; + + /** + * @clockid: This is input by the user and is the reference clock id for + * CPU timestamp. For definition, see clock_gettime(2) and + * perf_event_open(2). Supported clock ids are CLOCK_MONOTONIC, + * CLOCK_MONOTONIC_RAW, CLOCK_REALTIME, CLOCK_BOOTTIME, CLOCK_TAI. + */ + __s32 clockid; + + /** @width: Width of the engine cycle counter in bits. */ + __u32 width; + + /** + * @engine_cycles: Engine cycles as read from its register + * at 0x358 offset. + */ + __u64 engine_cycles; + + /** + * @cpu_timestamp: CPU timestamp in ns. The timestamp is captured before + * reading the engine_cycles register using the reference clockid set by + * the user. + */ + __u64 cpu_timestamp; + + /** + * @cpu_delta: Time delta in ns captured around reading the lower dword + * of the engine_cycles register. + */ + __u64 cpu_delta; +}; + +/** + * struct drm_xe_query_uc_fw_version - query a micro-controller firmware version + * + * Given a uc_type this will return the branch, major, minor and patch version + * of the micro-controller firmware. + */ +struct drm_xe_query_uc_fw_version { + /** @uc_type: The micro-controller type to query firmware version */ +#define XE_QUERY_UC_TYPE_GUC_SUBMISSION 0 +#define XE_QUERY_UC_TYPE_HUC 1 + __u16 uc_type; + + /** @pad: MBZ */ + __u16 pad; + + /** @branch_ver: branch uc fw version */ + __u32 branch_ver; + /** @major_ver: major uc fw version */ + __u32 major_ver; + /** @minor_ver: minor uc fw version */ + __u32 minor_ver; + /** @patch_ver: patch uc fw version */ + __u32 patch_ver; + + /** @pad2: MBZ */ + __u32 pad2; + + /** @reserved: Reserved */ + __u64 reserved; +}; + +/** + * struct drm_xe_query_pxp_status - query if PXP is ready + * + * If PXP is enabled and no fatal error has occurred, the status will be set to + * one of the following values: + * 0: PXP init still in progress + * 1: PXP init complete + * + * If PXP is not enabled or something has gone wrong, the query will be failed + * with one of the following error codes: + * -ENODEV: PXP not supported or disabled; + * -EIO: fatal error occurred during init, so PXP will never be enabled; + * -EINVAL: incorrect value provided as part of the query; + * -EFAULT: error copying the memory between kernel and userspace. + * + * The status can only be 0 in the first few seconds after driver load. If + * everything works as expected, the status will transition to init complete in + * less than 1 second, while in case of errors the driver might take longer to + * start returning an error code, but it should still take less than 10 seconds. + * + * The supported session type bitmask is based on the values in + * enum drm_xe_pxp_session_type. TYPE_NONE is always supported and therefore + * is not reported in the bitmask. + * + */ +struct drm_xe_query_pxp_status { + /** @status: current PXP status */ + __u32 status; + + /** @supported_session_types: bitmask of supported PXP session types */ + __u32 supported_session_types; +}; + +/** + * struct drm_xe_device_query - Input of &DRM_IOCTL_XE_DEVICE_QUERY - main + * structure to query device information + * + * The user selects the type of data to query among DRM_XE_DEVICE_QUERY_* + * and sets the value in the query member. This determines the type of + * the structure provided by the driver in data, among struct drm_xe_query_*. + * + * The @query can be: + * - %DRM_XE_DEVICE_QUERY_ENGINES + * - %DRM_XE_DEVICE_QUERY_MEM_REGIONS + * - %DRM_XE_DEVICE_QUERY_CONFIG + * - %DRM_XE_DEVICE_QUERY_GT_LIST + * - %DRM_XE_DEVICE_QUERY_HWCONFIG - Query type to retrieve the hardware + * configuration of the device such as information on slices, memory, + * caches, and so on. It is provided as a table of key / value + * attributes. + * - %DRM_XE_DEVICE_QUERY_GT_TOPOLOGY + * - %DRM_XE_DEVICE_QUERY_ENGINE_CYCLES + * - %DRM_XE_DEVICE_QUERY_PXP_STATUS + * + * If size is set to 0, the driver fills it with the required size for + * the requested type of data to query. If size is equal to the required + * size, the queried information is copied into data. If size is set to + * a value different from 0 and different from the required size, the + * IOCTL call returns -EINVAL. + * + * For example the following code snippet allows retrieving and printing + * information about the device engines with DRM_XE_DEVICE_QUERY_ENGINES: + * + * .. code-block:: C + * + * struct drm_xe_query_engines *engines; + * struct drm_xe_device_query query = { + * .extensions = 0, + * .query = DRM_XE_DEVICE_QUERY_ENGINES, + * .size = 0, + * .data = 0, + * }; + * ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query); + * engines = malloc(query.size); + * query.data = (uintptr_t)engines; + * ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query); + * for (int i = 0; i < engines->num_engines; i++) { + * printf("Engine %d: %s\n", i, + * engines->engines[i].instance.engine_class == + * DRM_XE_ENGINE_CLASS_RENDER ? "RENDER": + * engines->engines[i].instance.engine_class == + * DRM_XE_ENGINE_CLASS_COPY ? "COPY": + * engines->engines[i].instance.engine_class == + * DRM_XE_ENGINE_CLASS_VIDEO_DECODE ? "VIDEO_DECODE": + * engines->engines[i].instance.engine_class == + * DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE ? "VIDEO_ENHANCE": + * engines->engines[i].instance.engine_class == + * DRM_XE_ENGINE_CLASS_COMPUTE ? "COMPUTE": + * "UNKNOWN"); + * } + * free(engines); + */ +struct drm_xe_device_query { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + +#define DRM_XE_DEVICE_QUERY_ENGINES 0 +#define DRM_XE_DEVICE_QUERY_MEM_REGIONS 1 +#define DRM_XE_DEVICE_QUERY_CONFIG 2 +#define DRM_XE_DEVICE_QUERY_GT_LIST 3 +#define DRM_XE_DEVICE_QUERY_HWCONFIG 4 +#define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 +#define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6 +#define DRM_XE_DEVICE_QUERY_UC_FW_VERSION 7 +#define DRM_XE_DEVICE_QUERY_OA_UNITS 8 +#define DRM_XE_DEVICE_QUERY_PXP_STATUS 9 +#define DRM_XE_DEVICE_QUERY_EU_STALL 10 + /** @query: The type of data to query */ + __u32 query; + + /** @size: Size of the queried data */ + __u32 size; + + /** @data: Queried data is placed here */ + __u64 data; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * struct drm_xe_gem_create - Input of &DRM_IOCTL_XE_GEM_CREATE - A structure + * for gem creation + * + * The @flags can be: + * - %DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING + * - %DRM_XE_GEM_CREATE_FLAG_SCANOUT + * - %DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM - When using VRAM as a + * possible placement, ensure that the corresponding VRAM allocation + * will always use the CPU accessible part of VRAM. This is important + * for small-bar systems (on full-bar systems this gets turned into a + * noop). + * Note1: System memory can be used as an extra placement if the kernel + * should spill the allocation to system memory, if space can't be made + * available in the CPU accessible part of VRAM (giving the same + * behaviour as the i915 interface, see + * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS). + * Note2: For clear-color CCS surfaces the kernel needs to read the + * clear-color value stored in the buffer, and on discrete platforms we + * need to use VRAM for display surfaces, therefore the kernel requires + * setting this flag for such objects, otherwise an error is thrown on + * small-bar systems. + * + * @cpu_caching supports the following values: + * - %DRM_XE_GEM_CPU_CACHING_WB - Allocate the pages with write-back + * caching. On iGPU this can't be used for scanout surfaces. Currently + * not allowed for objects placed in VRAM. + * - %DRM_XE_GEM_CPU_CACHING_WC - Allocate the pages as write-combined. This + * is uncached. Scanout surfaces should likely use this. All objects + * that can be placed in VRAM must use this. + * + * This ioctl supports setting the following properties via the + * %DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY extension, which uses the + * generic @drm_xe_ext_set_property struct: + * + * - %DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE - set the type of PXP session + * this object will be used with. Valid values are listed in enum + * drm_xe_pxp_session_type. %DRM_XE_PXP_TYPE_NONE is the default behavior, so + * there is no need to explicitly set that. Objects used with session of type + * %DRM_XE_PXP_TYPE_HWDRM will be marked as invalid if a PXP invalidation + * event occurs after their creation. Attempting to flip an invalid object + * will cause a black frame to be displayed instead. Submissions with invalid + * objects mapped in the VM will be rejected. + */ +struct drm_xe_gem_create { +#define DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY 0 +#define DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE 0 + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** + * @size: Size of the object to be created, must match region + * (system or vram) minimum alignment (&min_page_size). + */ + __u64 size; + + /** + * @placement: A mask of memory instances of where BO can be placed. + * Each index in this mask refers directly to the struct + * drm_xe_query_mem_regions' instance, no assumptions should + * be made about order. The type of each region is described + * by struct drm_xe_query_mem_regions' mem_class. + */ + __u32 placement; + +#define DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING (1 << 0) +#define DRM_XE_GEM_CREATE_FLAG_SCANOUT (1 << 1) +#define DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM (1 << 2) + /** + * @flags: Flags, currently a mask of memory instances of where BO can + * be placed + */ + __u32 flags; + + /** + * @vm_id: Attached VM, if any + * + * If a VM is specified, this BO must: + * + * 1. Only ever be bound to that VM. + * 2. Cannot be exported as a PRIME fd. + */ + __u32 vm_id; + + /** + * @handle: Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; + +#define DRM_XE_GEM_CPU_CACHING_WB 1 +#define DRM_XE_GEM_CPU_CACHING_WC 2 + /** + * @cpu_caching: The CPU caching mode to select for this object. If + * mmaping the object the mode selected here will also be used. The + * exception is when mapping system memory (including data evicted + * to system) on discrete GPUs. The caching mode selected will + * then be overridden to DRM_XE_GEM_CPU_CACHING_WB, and coherency + * between GPU- and CPU is guaranteed. The caching mode of + * existing CPU-mappings will be updated transparently to + * user-space clients. + */ + __u16 cpu_caching; + /** @pad: MBZ */ + __u16 pad[3]; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * struct drm_xe_gem_mmap_offset - Input of &DRM_IOCTL_XE_GEM_MMAP_OFFSET + * + * The @flags can be: + * - %DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER - For user to query special offset + * for use in mmap ioctl. Writing to the returned mmap address will generate + * a PCI memory barrier with low overhead (avoiding IOCTL call as well as + * writing to VRAM which would also add overhead), acting like an MI_MEM_FENCE + * instruction. + * + * Note: The mmap size can be at most 4K, due to HW limitations. As a result + * this interface is only supported on CPU architectures that support 4K page + * size. The mmap_offset ioctl will detect this and gracefully return an + * error, where userspace is expected to have a different fallback method for + * triggering a barrier. + * + * Roughly the usage would be as follows: + * + * .. code-block:: C + * + * struct drm_xe_gem_mmap_offset mmo = { + * .handle = 0, // must be set to 0 + * .flags = DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER, + * }; + * + * err = ioctl(fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mmo); + * map = mmap(NULL, size, PROT_WRITE, MAP_SHARED, fd, mmo.offset); + * map[i] = 0xdeadbeaf; // issue barrier + */ +struct drm_xe_gem_mmap_offset { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @handle: Handle for the object being mapped. */ + __u32 handle; + +#define DRM_XE_MMAP_OFFSET_FLAG_PCI_BARRIER (1 << 0) + /** @flags: Flags */ + __u32 flags; + + /** @offset: The fake offset to use for subsequent mmap call */ + __u64 offset; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * struct drm_xe_vm_create - Input of &DRM_IOCTL_XE_VM_CREATE + * + * The @flags can be: + * - %DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE - Map the whole virtual address + * space of the VM to scratch page. A vm_bind would overwrite the scratch + * page mapping. This flag is mutually exclusive with the + * %DRM_XE_VM_CREATE_FLAG_FAULT_MODE flag, with an exception of on x2 and + * xe3 platform. + * - %DRM_XE_VM_CREATE_FLAG_LR_MODE - An LR, or Long Running VM accepts + * exec submissions to its exec_queues that don't have an upper time + * limit on the job execution time. But exec submissions to these + * don't allow any of the sync types DRM_XE_SYNC_TYPE_SYNCOBJ, + * DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ, used as out-syncobjs, that is, + * together with sync flag DRM_XE_SYNC_FLAG_SIGNAL. + * LR VMs can be created in recoverable page-fault mode using + * DRM_XE_VM_CREATE_FLAG_FAULT_MODE, if the device supports it. + * If that flag is omitted, the UMD can not rely on the slightly + * different per-VM overcommit semantics that are enabled by + * DRM_XE_VM_CREATE_FLAG_FAULT_MODE (see below), but KMD may + * still enable recoverable pagefaults if supported by the device. + * - %DRM_XE_VM_CREATE_FLAG_FAULT_MODE - Requires also + * DRM_XE_VM_CREATE_FLAG_LR_MODE. It allows memory to be allocated on + * demand when accessed, and also allows per-VM overcommit of memory. + * The xe driver internally uses recoverable pagefaults to implement + * this. + */ +struct drm_xe_vm_create { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + +#define DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE (1 << 0) +#define DRM_XE_VM_CREATE_FLAG_LR_MODE (1 << 1) +#define DRM_XE_VM_CREATE_FLAG_FAULT_MODE (1 << 2) + /** @flags: Flags */ + __u32 flags; + + /** @vm_id: Returned VM ID */ + __u32 vm_id; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * struct drm_xe_vm_destroy - Input of &DRM_IOCTL_XE_VM_DESTROY + */ +struct drm_xe_vm_destroy { + /** @vm_id: VM ID */ + __u32 vm_id; + + /** @pad: MBZ */ + __u32 pad; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * struct drm_xe_vm_bind_op - run bind operations + * + * The @op can be: + * - %DRM_XE_VM_BIND_OP_MAP + * - %DRM_XE_VM_BIND_OP_UNMAP + * - %DRM_XE_VM_BIND_OP_MAP_USERPTR + * - %DRM_XE_VM_BIND_OP_UNMAP_ALL + * - %DRM_XE_VM_BIND_OP_PREFETCH + * + * and the @flags can be: + * - %DRM_XE_VM_BIND_FLAG_READONLY - Setup the page tables as read-only + * to ensure write protection + * - %DRM_XE_VM_BIND_FLAG_IMMEDIATE - On a faulting VM, do the + * MAP operation immediately rather than deferring the MAP to the page + * fault handler. This is implied on a non-faulting VM as there is no + * fault handler to defer to. + * - %DRM_XE_VM_BIND_FLAG_NULL - When the NULL flag is set, the page + * tables are setup with a special bit which indicates writes are + * dropped and all reads return zero. In the future, the NULL flags + * will only be valid for DRM_XE_VM_BIND_OP_MAP operations, the BO + * handle MBZ, and the BO offset MBZ. This flag is intended to + * implement VK sparse bindings. + * - %DRM_XE_VM_BIND_FLAG_CHECK_PXP - If the object is encrypted via PXP, + * reject the binding if the encryption key is no longer valid. This + * flag has no effect on BOs that are not marked as using PXP. + * - %DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR - When the CPU address mirror flag is + * set, no mappings are created rather the range is reserved for CPU address + * mirroring which will be populated on GPU page faults or prefetches. Only + * valid on VMs with DRM_XE_VM_CREATE_FLAG_FAULT_MODE set. The CPU address + * mirror flag are only valid for DRM_XE_VM_BIND_OP_MAP operations, the BO + * handle MBZ, and the BO offset MBZ. + */ +struct drm_xe_vm_bind_op { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** + * @obj: GEM object to operate on, MBZ for MAP_USERPTR, MBZ for UNMAP + */ + __u32 obj; + + /** + * @pat_index: The platform defined @pat_index to use for this mapping. + * The index basically maps to some predefined memory attributes, + * including things like caching, coherency, compression etc. The exact + * meaning of the pat_index is platform specific and defined in the + * Bspec and PRMs. When the KMD sets up the binding the index here is + * encoded into the ppGTT PTE. + * + * For coherency the @pat_index needs to be at least 1way coherent when + * drm_xe_gem_create.cpu_caching is DRM_XE_GEM_CPU_CACHING_WB. The KMD + * will extract the coherency mode from the @pat_index and reject if + * there is a mismatch (see note below for pre-MTL platforms). + * + * Note: On pre-MTL platforms there is only a caching mode and no + * explicit coherency mode, but on such hardware there is always a + * shared-LLC (or is dgpu) so all GT memory accesses are coherent with + * CPU caches even with the caching mode set as uncached. It's only the + * display engine that is incoherent (on dgpu it must be in VRAM which + * is always mapped as WC on the CPU). However to keep the uapi somewhat + * consistent with newer platforms the KMD groups the different cache + * levels into the following coherency buckets on all pre-MTL platforms: + * + * ppGTT UC -> COH_NONE + * ppGTT WC -> COH_NONE + * ppGTT WT -> COH_NONE + * ppGTT WB -> COH_AT_LEAST_1WAY + * + * In practice UC/WC/WT should only ever used for scanout surfaces on + * such platforms (or perhaps in general for dma-buf if shared with + * another device) since it is only the display engine that is actually + * incoherent. Everything else should typically use WB given that we + * have a shared-LLC. On MTL+ this completely changes and the HW + * defines the coherency mode as part of the @pat_index, where + * incoherent GT access is possible. + * + * Note: For userptr and externally imported dma-buf the kernel expects + * either 1WAY or 2WAY for the @pat_index. + * + * For DRM_XE_VM_BIND_FLAG_NULL bindings there are no KMD restrictions + * on the @pat_index. For such mappings there is no actual memory being + * mapped (the address in the PTE is invalid), so the various PAT memory + * attributes likely do not apply. Simply leaving as zero is one + * option (still a valid pat_index). Same applies to + * DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR bindings as for such mapping + * there is no actual memory being mapped. + */ + __u16 pat_index; + + /** @pad: MBZ */ + __u16 pad; + + union { + /** + * @obj_offset: Offset into the object, MBZ for CLEAR_RANGE, + * ignored for unbind + */ + __u64 obj_offset; + + /** @userptr: user pointer to bind on */ + __u64 userptr; + + /** + * @cpu_addr_mirror_offset: Offset from GPU @addr to create + * CPU address mirror mappings. MBZ with current level of + * support (e.g. 1 to 1 mapping between GPU and CPU mappings + * only supported). + */ + __s64 cpu_addr_mirror_offset; + }; + + /** + * @range: Number of bytes from the object to bind to addr, MBZ for + * UNMAP_ALL + */ + __u64 range; + + /** @addr: Address to operate on, MBZ for UNMAP_ALL */ + __u64 addr; + +#define DRM_XE_VM_BIND_OP_MAP 0x0 +#define DRM_XE_VM_BIND_OP_UNMAP 0x1 +#define DRM_XE_VM_BIND_OP_MAP_USERPTR 0x2 +#define DRM_XE_VM_BIND_OP_UNMAP_ALL 0x3 +#define DRM_XE_VM_BIND_OP_PREFETCH 0x4 + /** @op: Bind operation to perform */ + __u32 op; + +#define DRM_XE_VM_BIND_FLAG_READONLY (1 << 0) +#define DRM_XE_VM_BIND_FLAG_IMMEDIATE (1 << 1) +#define DRM_XE_VM_BIND_FLAG_NULL (1 << 2) +#define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) +#define DRM_XE_VM_BIND_FLAG_CHECK_PXP (1 << 4) +#define DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR (1 << 5) + /** @flags: Bind flags */ + __u32 flags; + + /** + * @prefetch_mem_region_instance: Memory region to prefetch VMA to. + * It is a region instance, not a mask. + * To be used only with %DRM_XE_VM_BIND_OP_PREFETCH operation. + */ + __u32 prefetch_mem_region_instance; + + /** @pad2: MBZ */ + __u32 pad2; + + /** @reserved: Reserved */ + __u64 reserved[3]; +}; + +/** + * struct drm_xe_vm_bind - Input of &DRM_IOCTL_XE_VM_BIND + * + * Below is an example of a minimal use of @drm_xe_vm_bind to + * asynchronously bind the buffer `data` at address `BIND_ADDRESS` to + * illustrate `userptr`. It can be synchronized by using the example + * provided for @drm_xe_sync. + * + * .. code-block:: C + * + * data = aligned_alloc(ALIGNMENT, BO_SIZE); + * struct drm_xe_vm_bind bind = { + * .vm_id = vm, + * .num_binds = 1, + * .bind.obj = 0, + * .bind.obj_offset = to_user_pointer(data), + * .bind.range = BO_SIZE, + * .bind.addr = BIND_ADDRESS, + * .bind.op = DRM_XE_VM_BIND_OP_MAP_USERPTR, + * .bind.flags = 0, + * .num_syncs = 1, + * .syncs = &sync, + * .exec_queue_id = 0, + * }; + * ioctl(fd, DRM_IOCTL_XE_VM_BIND, &bind); + * + */ +struct drm_xe_vm_bind { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @vm_id: The ID of the VM to bind to */ + __u32 vm_id; + + /** + * @exec_queue_id: exec_queue_id, must be of class + * DRM_XE_ENGINE_CLASS_VM_BIND and exec queue must have same vm_id. If + * zero, the default VM bind engine is used. + */ + __u32 exec_queue_id; + + /** @pad: MBZ */ + __u32 pad; + + /** @num_binds: number of binds in this IOCTL */ + __u32 num_binds; + + union { + /** @bind: used if num_binds == 1 */ + struct drm_xe_vm_bind_op bind; + + /** + * @vector_of_binds: userptr to array of struct + * drm_xe_vm_bind_op if num_binds > 1 + */ + __u64 vector_of_binds; + }; + + /** @pad2: MBZ */ + __u32 pad2; + + /** @num_syncs: amount of syncs to wait on */ + __u32 num_syncs; + + /** @syncs: pointer to struct drm_xe_sync array */ + __u64 syncs; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * struct drm_xe_exec_queue_create - Input of &DRM_IOCTL_XE_EXEC_QUEUE_CREATE + * + * This ioctl supports setting the following properties via the + * %DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY extension, which uses the + * generic @drm_xe_ext_set_property struct: + * + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY - set the queue priority. + * CAP_SYS_NICE is required to set a value above normal. + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE - set the queue timeslice + * duration in microseconds. + * - %DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE - set the type of PXP session + * this queue will be used with. Valid values are listed in enum + * drm_xe_pxp_session_type. %DRM_XE_PXP_TYPE_NONE is the default behavior, so + * there is no need to explicitly set that. When a queue of type + * %DRM_XE_PXP_TYPE_HWDRM is created, the PXP default HWDRM session + * (%XE_PXP_HWDRM_DEFAULT_SESSION) will be started, if isn't already running. + * The user is expected to query the PXP status via the query ioctl (see + * %DRM_XE_DEVICE_QUERY_PXP_STATUS) and to wait for PXP to be ready before + * attempting to create a queue with this property. When a queue is created + * before PXP is ready, the ioctl will return -EBUSY if init is still in + * progress or -EIO if init failed. + * Given that going into a power-saving state kills PXP HWDRM sessions, + * runtime PM will be blocked while queues of this type are alive. + * All PXP queues will be killed if a PXP invalidation event occurs. + * + * The example below shows how to use @drm_xe_exec_queue_create to create + * a simple exec_queue (no parallel submission) of class + * &DRM_XE_ENGINE_CLASS_RENDER. + * + * .. code-block:: C + * + * struct drm_xe_engine_class_instance instance = { + * .engine_class = DRM_XE_ENGINE_CLASS_RENDER, + * }; + * struct drm_xe_exec_queue_create exec_queue_create = { + * .extensions = 0, + * .vm_id = vm, + * .num_bb_per_exec = 1, + * .num_eng_per_bb = 1, + * .instances = to_user_pointer(&instance), + * }; + * ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &exec_queue_create); + * + * Allow users to provide a hint to kernel for cases demanding low latency + * profile. Please note it will have impact on power consumption. User can + * indicate low latency hint with flag while creating exec queue as + * mentioned below, + * + * struct drm_xe_exec_queue_create exec_queue_create = { + * .flags = DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT, + * .extensions = 0, + * .vm_id = vm, + * .num_bb_per_exec = 1, + * .num_eng_per_bb = 1, + * .instances = to_user_pointer(&instance), + * }; + * ioctl(fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &exec_queue_create); + * + */ +struct drm_xe_exec_queue_create { +#define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE 2 + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @width: submission width (number BB per exec) for this exec queue */ + __u16 width; + + /** @num_placements: number of valid placements for this exec queue */ + __u16 num_placements; + + /** @vm_id: VM to use for this exec queue */ + __u32 vm_id; + +#define DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT (1 << 0) + /** @flags: flags to use for this exec queue */ + __u32 flags; + + /** @exec_queue_id: Returned exec queue ID */ + __u32 exec_queue_id; + + /** + * @instances: user pointer to a 2-d array of struct + * drm_xe_engine_class_instance + * + * length = width (i) * num_placements (j) + * index = j + i * width + */ + __u64 instances; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * struct drm_xe_exec_queue_destroy - Input of &DRM_IOCTL_XE_EXEC_QUEUE_DESTROY + */ +struct drm_xe_exec_queue_destroy { + /** @exec_queue_id: Exec queue ID */ + __u32 exec_queue_id; + + /** @pad: MBZ */ + __u32 pad; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * struct drm_xe_exec_queue_get_property - Input of + * &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY + * + * The @property can be: + * - %DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN + */ +struct drm_xe_exec_queue_get_property { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @exec_queue_id: Exec queue ID */ + __u32 exec_queue_id; + +#define DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN 0 + /** @property: property to get */ + __u32 property; + + /** @value: property value */ + __u64 value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * struct drm_xe_sync - sync object + * + * The @type can be: + * - %DRM_XE_SYNC_TYPE_SYNCOBJ + * - %DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ + * - %DRM_XE_SYNC_TYPE_USER_FENCE + * + * and the @flags can be: + * - %DRM_XE_SYNC_FLAG_SIGNAL + * + * A minimal use of @drm_xe_sync looks like this: + * + * .. code-block:: C + * + * struct drm_xe_sync sync = { + * .flags = DRM_XE_SYNC_FLAG_SIGNAL, + * .type = DRM_XE_SYNC_TYPE_SYNCOBJ, + * }; + * struct drm_syncobj_create syncobj_create = { 0 }; + * ioctl(fd, DRM_IOCTL_SYNCOBJ_CREATE, &syncobj_create); + * sync.handle = syncobj_create.handle; + * ... + * use of &sync in drm_xe_exec or drm_xe_vm_bind + * ... + * struct drm_syncobj_wait wait = { + * .handles = &sync.handle, + * .timeout_nsec = INT64_MAX, + * .count_handles = 1, + * .flags = 0, + * .first_signaled = 0, + * .pad = 0, + * }; + * ioctl(fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait); + */ +struct drm_xe_sync { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + +#define DRM_XE_SYNC_TYPE_SYNCOBJ 0x0 +#define DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ 0x1 +#define DRM_XE_SYNC_TYPE_USER_FENCE 0x2 + /** @type: Type of the this sync object */ + __u32 type; + +#define DRM_XE_SYNC_FLAG_SIGNAL (1 << 0) + /** @flags: Sync Flags */ + __u32 flags; + + union { + /** @handle: Handle for the object */ + __u32 handle; + + /** + * @addr: Address of user fence. When sync is passed in via exec + * IOCTL this is a GPU address in the VM. When sync passed in + * via VM bind IOCTL this is a user pointer. In either case, it + * is the users responsibility that this address is present and + * mapped when the user fence is signalled. Must be qword + * aligned. + */ + __u64 addr; + }; + + /** + * @timeline_value: Input for the timeline sync object. Needs to be + * different than 0 when used with %DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ. + */ + __u64 timeline_value; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * struct drm_xe_exec - Input of &DRM_IOCTL_XE_EXEC + * + * This is an example to use @drm_xe_exec for execution of the object + * at BIND_ADDRESS (see example in @drm_xe_vm_bind) by an exec_queue + * (see example in @drm_xe_exec_queue_create). It can be synchronized + * by using the example provided for @drm_xe_sync. + * + * .. code-block:: C + * + * struct drm_xe_exec exec = { + * .exec_queue_id = exec_queue, + * .syncs = &sync, + * .num_syncs = 1, + * .address = BIND_ADDRESS, + * .num_batch_buffer = 1, + * }; + * ioctl(fd, DRM_IOCTL_XE_EXEC, &exec); + * + */ +struct drm_xe_exec { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @exec_queue_id: Exec queue ID for the batch buffer */ + __u32 exec_queue_id; + + /** @num_syncs: Amount of struct drm_xe_sync in array. */ + __u32 num_syncs; + + /** @syncs: Pointer to struct drm_xe_sync array. */ + __u64 syncs; + + /** + * @address: address of batch buffer if num_batch_buffer == 1 or an + * array of batch buffer addresses + */ + __u64 address; + + /** + * @num_batch_buffer: number of batch buffer in this exec, must match + * the width of the engine + */ + __u16 num_batch_buffer; + + /** @pad: MBZ */ + __u16 pad[3]; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * struct drm_xe_wait_user_fence - Input of &DRM_IOCTL_XE_WAIT_USER_FENCE + * + * Wait on user fence, XE will wake-up on every HW engine interrupt in the + * instances list and check if user fence is complete:: + * + * (*addr & MASK) OP (VALUE & MASK) + * + * Returns to user on user fence completion or timeout. + * + * The @op can be: + * - %DRM_XE_UFENCE_WAIT_OP_EQ + * - %DRM_XE_UFENCE_WAIT_OP_NEQ + * - %DRM_XE_UFENCE_WAIT_OP_GT + * - %DRM_XE_UFENCE_WAIT_OP_GTE + * - %DRM_XE_UFENCE_WAIT_OP_LT + * - %DRM_XE_UFENCE_WAIT_OP_LTE + * + * and the @flags can be: + * - %DRM_XE_UFENCE_WAIT_FLAG_ABSTIME + * - %DRM_XE_UFENCE_WAIT_FLAG_SOFT_OP + * + * The @mask values can be for example: + * - 0xffu for u8 + * - 0xffffu for u16 + * - 0xffffffffu for u32 + * - 0xffffffffffffffffu for u64 + */ +struct drm_xe_wait_user_fence { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** + * @addr: user pointer address to wait on, must qword aligned + */ + __u64 addr; + +#define DRM_XE_UFENCE_WAIT_OP_EQ 0x0 +#define DRM_XE_UFENCE_WAIT_OP_NEQ 0x1 +#define DRM_XE_UFENCE_WAIT_OP_GT 0x2 +#define DRM_XE_UFENCE_WAIT_OP_GTE 0x3 +#define DRM_XE_UFENCE_WAIT_OP_LT 0x4 +#define DRM_XE_UFENCE_WAIT_OP_LTE 0x5 + /** @op: wait operation (type of comparison) */ + __u16 op; + +#define DRM_XE_UFENCE_WAIT_FLAG_ABSTIME (1 << 0) + /** @flags: wait flags */ + __u16 flags; + + /** @pad: MBZ */ + __u32 pad; + + /** @value: compare value */ + __u64 value; + + /** @mask: comparison mask */ + __u64 mask; + + /** + * @timeout: how long to wait before bailing, value in nanoseconds. + * Without DRM_XE_UFENCE_WAIT_FLAG_ABSTIME flag set (relative timeout) + * it contains timeout expressed in nanoseconds to wait (fence will + * expire at now() + timeout). + * When DRM_XE_UFENCE_WAIT_FLAG_ABSTIME flat is set (absolute timeout) + * wait will end at timeout (uses system MONOTONIC_CLOCK). Passing + * negative timeout leads to neverending wait. + * + * On relative timeout this value is updated with timeout left + * (for restarting the call in case of signal delivery). + * On absolute timeout this value stays intact (restarted call still + * expire at the same point of time). + */ + __s64 timeout; + + /** @exec_queue_id: exec_queue_id returned from + * xe_exec_queue_create_ioctl */ + __u32 exec_queue_id; + + /** @pad2: MBZ */ + __u32 pad2; + + /** @reserved: Reserved */ + __u64 reserved[2]; +}; + +/** + * enum drm_xe_observation_type - Observation stream types + */ +enum drm_xe_observation_type { + /** @DRM_XE_OBSERVATION_TYPE_OA: OA observation stream type */ + DRM_XE_OBSERVATION_TYPE_OA, + /** @DRM_XE_OBSERVATION_TYPE_EU_STALL: EU stall sampling observation + stream type */ + DRM_XE_OBSERVATION_TYPE_EU_STALL, +}; + +/** + * enum drm_xe_observation_op - Observation stream ops + */ +enum drm_xe_observation_op { + /** @DRM_XE_OBSERVATION_OP_STREAM_OPEN: Open an observation stream */ + DRM_XE_OBSERVATION_OP_STREAM_OPEN, + + /** @DRM_XE_OBSERVATION_OP_ADD_CONFIG: Add observation stream config */ + DRM_XE_OBSERVATION_OP_ADD_CONFIG, + + /** @DRM_XE_OBSERVATION_OP_REMOVE_CONFIG: Remove observation stream + config */ + DRM_XE_OBSERVATION_OP_REMOVE_CONFIG, +}; + +/** + * struct drm_xe_observation_param - Input of &DRM_XE_OBSERVATION + * + * The observation layer enables multiplexing observation streams of + * multiple types. The actual params for a particular stream operation are + * supplied via the @param pointer (use __copy_from_user to get these + * params). + */ +struct drm_xe_observation_param { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + /** @observation_type: observation stream type, of enum + * @drm_xe_observation_type */ + __u64 observation_type; + /** @observation_op: observation stream op, of enum + * @drm_xe_observation_op */ + __u64 observation_op; + /** @param: Pointer to actual stream params */ + __u64 param; +}; + +/** + * enum drm_xe_observation_ioctls - Observation stream fd ioctl's + * + * Information exchanged between userspace and kernel for observation fd + * ioctl's is stream type specific + */ +enum drm_xe_observation_ioctls { + /** @DRM_XE_OBSERVATION_IOCTL_ENABLE: Enable data capture for an + observation stream */ + DRM_XE_OBSERVATION_IOCTL_ENABLE = _IO('i', 0x0), + + /** @DRM_XE_OBSERVATION_IOCTL_DISABLE: Disable data capture for a + observation stream */ + DRM_XE_OBSERVATION_IOCTL_DISABLE = _IO('i', 0x1), + + /** @DRM_XE_OBSERVATION_IOCTL_CONFIG: Change observation stream + configuration */ + DRM_XE_OBSERVATION_IOCTL_CONFIG = _IO('i', 0x2), + + /** @DRM_XE_OBSERVATION_IOCTL_STATUS: Return observation stream status + */ + DRM_XE_OBSERVATION_IOCTL_STATUS = _IO('i', 0x3), + + /** @DRM_XE_OBSERVATION_IOCTL_INFO: Return observation stream info */ + DRM_XE_OBSERVATION_IOCTL_INFO = _IO('i', 0x4), +}; + +/** + * enum drm_xe_oa_unit_type - OA unit types + */ +enum drm_xe_oa_unit_type { + /** + * @DRM_XE_OA_UNIT_TYPE_OAG: OAG OA unit. OAR/OAC are considered + * sub-types of OAG. For OAR/OAC, use OAG. + */ + DRM_XE_OA_UNIT_TYPE_OAG, + + /** @DRM_XE_OA_UNIT_TYPE_OAM: OAM OA unit */ + DRM_XE_OA_UNIT_TYPE_OAM, + + /** @DRM_XE_OA_UNIT_TYPE_OAM_SAG: OAM_SAG OA unit */ + DRM_XE_OA_UNIT_TYPE_OAM_SAG, +}; + +/** + * struct drm_xe_oa_unit - describe OA unit + */ +struct drm_xe_oa_unit { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_unit_id: OA unit ID */ + __u32 oa_unit_id; + + /** @oa_unit_type: OA unit type of @drm_xe_oa_unit_type */ + __u32 oa_unit_type; + + /** @capabilities: OA capabilities bit-mask */ + __u64 capabilities; +#define DRM_XE_OA_CAPS_BASE (1 << 0) +#define DRM_XE_OA_CAPS_SYNCS (1 << 1) +#define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2) +#define DRM_XE_OA_CAPS_WAIT_NUM_REPORTS (1 << 3) +#define DRM_XE_OA_CAPS_OAM (1 << 4) + + /** @oa_timestamp_freq: OA timestamp freq */ + __u64 oa_timestamp_freq; + + /** @reserved: MBZ */ + __u64 reserved[4]; + + /** @num_engines: number of engines in @eci array */ + __u64 num_engines; + + /** @eci: engines attached to this OA unit */ + struct drm_xe_engine_class_instance eci[]; +}; + +/** + * struct drm_xe_query_oa_units - describe OA units + * + * If a query is made with a struct drm_xe_device_query where .query + * is equal to DRM_XE_DEVICE_QUERY_OA_UNITS, then the reply uses struct + * drm_xe_query_oa_units in .data. + * + * OA unit properties for all OA units can be accessed using a code block + * such as the one below: + * + * .. code-block:: C + * + * struct drm_xe_query_oa_units *qoa; + * struct drm_xe_oa_unit *oau; + * u8 *poau; + * + * // malloc qoa and issue DRM_XE_DEVICE_QUERY_OA_UNITS. Then: + * poau = (u8 *)&qoa->oa_units[0]; + * for (int i = 0; i < qoa->num_oa_units; i++) { + * oau = (struct drm_xe_oa_unit *)poau; + * // Access 'struct drm_xe_oa_unit' fields here + * poau += sizeof(*oau) + oau->num_engines * sizeof(oau->eci[0]); + * } + */ +struct drm_xe_query_oa_units { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + /** @num_oa_units: number of OA units returned in oau[] */ + __u32 num_oa_units; + /** @pad: MBZ */ + __u32 pad; + /** + * @oa_units: struct @drm_xe_oa_unit array returned for this device. + * Written below as a u64 array to avoid problems with nested flexible + * arrays with some compilers + */ + __u64 oa_units[]; +}; + +/** + * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec + * 52198/60942 + */ +enum drm_xe_oa_format_type { + /** @DRM_XE_OA_FMT_TYPE_OAG: OAG report format */ + DRM_XE_OA_FMT_TYPE_OAG, + /** @DRM_XE_OA_FMT_TYPE_OAR: OAR report format */ + DRM_XE_OA_FMT_TYPE_OAR, + /** @DRM_XE_OA_FMT_TYPE_OAM: OAM report format */ + DRM_XE_OA_FMT_TYPE_OAM, + /** @DRM_XE_OA_FMT_TYPE_OAC: OAC report format */ + DRM_XE_OA_FMT_TYPE_OAC, + /** @DRM_XE_OA_FMT_TYPE_OAM_MPEC: OAM SAMEDIA or OAM MPEC report format + */ + DRM_XE_OA_FMT_TYPE_OAM_MPEC, + /** @DRM_XE_OA_FMT_TYPE_PEC: PEC report format */ + DRM_XE_OA_FMT_TYPE_PEC, +}; + +/** + * enum drm_xe_oa_property_id - OA stream property id's + * + * Stream params are specified as a chain of @drm_xe_ext_set_property + * struct's, with @property values from enum @drm_xe_oa_property_id and + * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. + * @param field in struct @drm_xe_observation_param points to the first + * @drm_xe_ext_set_property struct. + * + * Exactly the same mechanism is also used for stream reconfiguration using the + * @DRM_XE_OBSERVATION_IOCTL_CONFIG observation stream fd ioctl, though only a + * subset of properties below can be specified for stream reconfiguration. + */ +enum drm_xe_oa_property_id { +#define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 + /** + * @DRM_XE_OA_PROPERTY_OA_UNIT_ID: ID of the OA unit on which to open + * the OA stream, see @oa_unit_id in 'struct + * drm_xe_query_oa_units'. Defaults to 0 if not provided. + */ + DRM_XE_OA_PROPERTY_OA_UNIT_ID = 1, + + /** + * @DRM_XE_OA_PROPERTY_SAMPLE_OA: A value of 1 requests inclusion of raw + * OA unit reports or stream samples in a global buffer attached to an + * OA unit. + */ + DRM_XE_OA_PROPERTY_SAMPLE_OA, + + /** + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA + * reports, previously added via @DRM_XE_OBSERVATION_OP_ADD_CONFIG. + */ + DRM_XE_OA_PROPERTY_OA_METRIC_SET, + + /** @DRM_XE_OA_PROPERTY_OA_FORMAT: OA counter report format */ + DRM_XE_OA_PROPERTY_OA_FORMAT, +/* + * OA_FORMAT's are specified the same way as in PRM/Bspec 52198/60942, + * in terms of the following quantities: a. enum @drm_xe_oa_format_type + * b. Counter select c. Counter size and d. BC report. Also refer to the + * oa_formats array in drivers/gpu/drm/xe/xe_oa.c. + */ +#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xffu << 0) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xffu << 8) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xffu << 16) +#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xffu << 24) + + /** + * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit + * sampling with sampling frequency proportional to 2^(period_exponent + + * 1) + */ + DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT, + + /** + * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA + * stream in a DISABLED state (see @DRM_XE_OBSERVATION_IOCTL_ENABLE). + */ + DRM_XE_OA_PROPERTY_OA_DISABLED, + + /** + * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific + * @exec_queue_id. OA queries can be executed on this exec queue. + */ + DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID, + + /** + * @DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE: Optional engine instance to + * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to + * 0. + */ + DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, + + /** + * @DRM_XE_OA_PROPERTY_NO_PREEMPT: Allow preemption and timeslicing + * to be disabled for the stream exec queue. + */ + DRM_XE_OA_PROPERTY_NO_PREEMPT, + + /** + * @DRM_XE_OA_PROPERTY_NUM_SYNCS: Number of syncs in the sync array + * specified in @DRM_XE_OA_PROPERTY_SYNCS + */ + DRM_XE_OA_PROPERTY_NUM_SYNCS, + + /** + * @DRM_XE_OA_PROPERTY_SYNCS: Pointer to struct @drm_xe_sync array + * with array size specified via @DRM_XE_OA_PROPERTY_NUM_SYNCS. OA + * configuration will wait till input fences signal. Output fences + * will signal after the new OA configuration takes effect. For + * @DRM_XE_SYNC_TYPE_USER_FENCE, @addr is a user pointer, similar + * to the VM bind case. + */ + DRM_XE_OA_PROPERTY_SYNCS, + + /** + * @DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE: Size of OA buffer to be + * allocated by the driver in bytes. Supported sizes are powers of + * 2 from 128 KiB to 128 MiB. When not specified, a 16 MiB OA + * buffer is allocated by default. + */ + DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE, + + /** + * @DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS: Number of reports to wait + * for before unblocking poll or read + */ + DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS, +}; + +/** + * struct drm_xe_oa_config - OA metric configuration + * + * Multiple OA configs can be added using @DRM_XE_OBSERVATION_OP_ADD_CONFIG. A + * particular config can be specified when opening an OA stream using + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET property. + */ +struct drm_xe_oa_config { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @uuid: String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x" */ + char uuid[36]; + + /** @n_regs: Number of regs in @regs_ptr */ + __u32 n_regs; + + /** + * @regs_ptr: Pointer to (register address, value) pairs for OA config + * registers. Expected length of buffer is: (2 * sizeof(u32) * @n_regs). + */ + __u64 regs_ptr; +}; + +/** + * struct drm_xe_oa_stream_status - OA stream status returned from + * @DRM_XE_OBSERVATION_IOCTL_STATUS observation stream fd ioctl. Userspace can + * call the ioctl to query stream status in response to EIO errno from + * observation fd read(). + */ +struct drm_xe_oa_stream_status { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_status: OA stream status (see Bspec 46717/61226) */ + __u64 oa_status; +#define DRM_XE_OASTATUS_MMIO_TRG_Q_FULL (1 << 3) +#define DRM_XE_OASTATUS_COUNTER_OVERFLOW (1 << 2) +#define DRM_XE_OASTATUS_BUFFER_OVERFLOW (1 << 1) +#define DRM_XE_OASTATUS_REPORT_LOST (1 << 0) + + /** @reserved: reserved for future use */ + __u64 reserved[3]; +}; + +/** + * struct drm_xe_oa_stream_info - OA stream info returned from + * @DRM_XE_OBSERVATION_IOCTL_INFO observation stream fd ioctl + */ +struct drm_xe_oa_stream_info { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_buf_size: OA buffer size */ + __u64 oa_buf_size; + + /** @reserved: reserved for future use */ + __u64 reserved[3]; +}; + +/** + * enum drm_xe_pxp_session_type - Supported PXP session types. + * + * We currently only support HWDRM sessions, which are used for protected + * content that ends up being displayed, but the HW supports multiple types, so + * we might extend support in the future. + */ +enum drm_xe_pxp_session_type { + /** @DRM_XE_PXP_TYPE_NONE: PXP not used */ + DRM_XE_PXP_TYPE_NONE = 0, + /** + * @DRM_XE_PXP_TYPE_HWDRM: HWDRM sessions are used for content that ends + * up on the display. + */ + DRM_XE_PXP_TYPE_HWDRM = 1, +}; + +/* ID of the protected content session managed by Xe when PXP is active */ +#define DRM_XE_PXP_HWDRM_DEFAULT_SESSION 0xf + +/** + * enum drm_xe_eu_stall_property_id - EU stall sampling input property ids. + * + * These properties are passed to the driver at open as a chain of + * @drm_xe_ext_set_property structures with @property set to these + * properties' enums and @value set to the corresponding values of these + * properties. @drm_xe_user_extension base.name should be set to + * @DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY. + * + * With the file descriptor obtained from open, user space must enable + * the EU stall stream fd with @DRM_XE_OBSERVATION_IOCTL_ENABLE before + * calling read(). EIO errno from read() indicates HW dropped data + * due to full buffer. + */ +enum drm_xe_eu_stall_property_id { +#define DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY 0 + /** + * @DRM_XE_EU_STALL_PROP_GT_ID: @gt_id of the GT on which + * EU stall data will be captured. + */ + DRM_XE_EU_STALL_PROP_GT_ID = 1, + + /** + * @DRM_XE_EU_STALL_PROP_SAMPLE_RATE: Sampling rate in + * GPU cycles from @sampling_rates in struct @drm_xe_query_eu_stall + */ + DRM_XE_EU_STALL_PROP_SAMPLE_RATE, + + /** + * @DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS: Minimum number of + * EU stall data reports to be present in the kernel buffer + * before unblocking a blocked poll or read. + */ + DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS, +}; + +/** + * struct drm_xe_query_eu_stall - Information about EU stall sampling. + * + * If a query is made with a struct @drm_xe_device_query where .query + * is equal to @DRM_XE_DEVICE_QUERY_EU_STALL, then the reply uses + * struct @drm_xe_query_eu_stall in .data. + */ +struct drm_xe_query_eu_stall { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @capabilities: EU stall capabilities bit-mask */ + __u64 capabilities; +#define DRM_XE_EU_STALL_CAPS_BASE (1 << 0) + + /** @record_size: size of each EU stall data record */ + __u64 record_size; + + /** @per_xecore_buf_size: internal per XeCore buffer size */ + __u64 per_xecore_buf_size; + + /** @reserved: Reserved */ + __u64 reserved[5]; + + /** @num_sampling_rates: Number of sampling rates in @sampling_rates + * array */ + __u64 num_sampling_rates; + + /** + * @sampling_rates: Flexible array of sampling rates + * sorted in the fastest to slowest order. + * Sampling rates are specified in GPU clock cycles. + */ + __u64 sampling_rates[]; +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _XE_DRM_H_ */ |
