[PATCH 5.14 090/162] drm/amdkfd: map SVM range with correct access permission

From: Greg Kroah-Hartman
Date: Mon Sep 27 2021 - 13:25:51 EST


From: Philip Yang <Philip.Yang@xxxxxxx>

[ Upstream commit 2f617f4df8dfef68f175160d533f5820a368023e ]

Restore retry fault or prefetch range, or restore svm range after
eviction to map range to GPU with correct read or write access
permission.

Range may includes multiple VMAs, update GPU page table with offset of
prange, number of pages for each VMA according VMA access permission.

Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx>
Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx>
Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 134 +++++++++++++++++----------
1 file changed, 86 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 0f7f1e5621ea..ddac10b5bd3a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -120,6 +120,7 @@ static void svm_range_remove_notifier(struct svm_range *prange)

static int
svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
+ unsigned long offset, unsigned long npages,
unsigned long *hmm_pfns, uint32_t gpuidx)
{
enum dma_data_direction dir = DMA_BIDIRECTIONAL;
@@ -136,7 +137,8 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
prange->dma_addr[gpuidx] = addr;
}

- for (i = 0; i < prange->npages; i++) {
+ addr += offset;
+ for (i = 0; i < npages; i++) {
if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]),
"leaking dma mapping\n"))
dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
@@ -167,6 +169,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,

static int
svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
+ unsigned long offset, unsigned long npages,
unsigned long *hmm_pfns)
{
struct kfd_process *p;
@@ -187,7 +190,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
}
adev = (struct amdgpu_device *)pdd->dev->kgd;

- r = svm_range_dma_map_dev(adev, prange, hmm_pfns, gpuidx);
+ r = svm_range_dma_map_dev(adev, prange, offset, npages,
+ hmm_pfns, gpuidx);
if (r)
break;
}
@@ -1088,11 +1092,6 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;

pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
-
- pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n",
- prange->svms, prange->start, prange->last,
- (domain == SVM_RANGE_VRAM_DOMAIN) ? 1:0, pte_flags, mapping_flags);
-
return pte_flags;
}

@@ -1156,7 +1155,8 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,

static int
svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- struct svm_range *prange, dma_addr_t *dma_addr,
+ struct svm_range *prange, unsigned long offset,
+ unsigned long npages, bool readonly, dma_addr_t *dma_addr,
struct amdgpu_device *bo_adev, struct dma_fence **fence)
{
struct amdgpu_bo_va bo_va;
@@ -1167,14 +1167,15 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int r = 0;
int64_t i;

- pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
- prange->last);
+ last_start = prange->start + offset;
+
+ pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms,
+ last_start, last_start + npages - 1, readonly);

if (prange->svm_bo && prange->ttm_res)
bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);

- last_start = prange->start;
- for (i = 0; i < prange->npages; i++) {
+ for (i = offset; i < offset + npages; i++) {
last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
if ((prange->start + i) < prange->last &&
@@ -1183,13 +1184,21 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,

pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
last_start, prange->start + i, last_domain ? "GPU" : "CPU");
+
pte_flags = svm_range_get_pte_flags(adev, prange, last_domain);
- r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
- last_start,
+ if (readonly)
+ pte_flags &= ~AMDGPU_PTE_WRITEABLE;
+
+ pr_debug("svms 0x%p map [0x%lx 0x%llx] vram %d PTE 0x%llx\n",
+ prange->svms, last_start, prange->start + i,
+ (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
+ pte_flags);
+
+ r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
+ NULL, last_start,
prange->start + i, pte_flags,
last_start - prange->start,
- NULL,
- dma_addr,
+ NULL, dma_addr,
&vm->last_update,
&table_freed);
if (r) {
@@ -1220,8 +1229,10 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return r;
}

-static int svm_range_map_to_gpus(struct svm_range *prange,
- unsigned long *bitmap, bool wait)
+static int
+svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
+ unsigned long npages, bool readonly,
+ unsigned long *bitmap, bool wait)
{
struct kfd_process_device *pdd;
struct amdgpu_device *bo_adev;
@@ -1257,7 +1268,8 @@ static int svm_range_map_to_gpus(struct svm_range *prange,
}

r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv),
- prange, prange->dma_addr[gpuidx],
+ prange, offset, npages, readonly,
+ prange->dma_addr[gpuidx],
bo_adev, wait ? &fence : NULL);
if (r)
break;
@@ -1390,7 +1402,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
int32_t gpuidx, bool intr, bool wait)
{
struct svm_validate_context ctx;
- struct hmm_range *hmm_range;
+ unsigned long start, end, addr;
struct kfd_process *p;
void *owner;
int32_t idx;
@@ -1448,40 +1460,66 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
break;
}
}
- r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
- prange->start << PAGE_SHIFT,
- prange->npages, &hmm_range,
- false, true, owner);
- if (r) {
- pr_debug("failed %d to get svm range pages\n", r);
- goto unreserve_out;
- }

- r = svm_range_dma_map(prange, ctx.bitmap,
- hmm_range->hmm_pfns);
- if (r) {
- pr_debug("failed %d to dma map range\n", r);
- goto unreserve_out;
- }
+ start = prange->start << PAGE_SHIFT;
+ end = (prange->last + 1) << PAGE_SHIFT;
+ for (addr = start; addr < end && !r; ) {
+ struct hmm_range *hmm_range;
+ struct vm_area_struct *vma;
+ unsigned long next;
+ unsigned long offset;
+ unsigned long npages;
+ bool readonly;

- prange->validated_once = true;
+ vma = find_vma(mm, addr);
+ if (!vma || addr < vma->vm_start) {
+ r = -EFAULT;
+ goto unreserve_out;
+ }
+ readonly = !(vma->vm_flags & VM_WRITE);

- svm_range_lock(prange);
- if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
- pr_debug("hmm update the range, need validate again\n");
- r = -EAGAIN;
- goto unlock_out;
- }
- if (!list_empty(&prange->child_list)) {
- pr_debug("range split by unmap in parallel, validate again\n");
- r = -EAGAIN;
- goto unlock_out;
- }
+ next = min(vma->vm_end, end);
+ npages = (next - addr) >> PAGE_SHIFT;
+ r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
+ addr, npages, &hmm_range,
+ readonly, true, owner);
+ if (r) {
+ pr_debug("failed %d to get svm range pages\n", r);
+ goto unreserve_out;
+ }

- r = svm_range_map_to_gpus(prange, ctx.bitmap, wait);
+ offset = (addr - start) >> PAGE_SHIFT;
+ r = svm_range_dma_map(prange, ctx.bitmap, offset, npages,
+ hmm_range->hmm_pfns);
+ if (r) {
+ pr_debug("failed %d to dma map range\n", r);
+ goto unreserve_out;
+ }
+
+ svm_range_lock(prange);
+ if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
+ pr_debug("hmm update the range, need validate again\n");
+ r = -EAGAIN;
+ goto unlock_out;
+ }
+ if (!list_empty(&prange->child_list)) {
+ pr_debug("range split by unmap in parallel, validate again\n");
+ r = -EAGAIN;
+ goto unlock_out;
+ }
+
+ r = svm_range_map_to_gpus(prange, offset, npages, readonly,
+ ctx.bitmap, wait);

unlock_out:
- svm_range_unlock(prange);
+ svm_range_unlock(prange);
+
+ addr = next;
+ }
+
+ if (addr == end)
+ prange->validated_once = true;
+
unreserve_out:
svm_range_unreserve_bos(&ctx);

--
2.33.0