Re: [PATCH] fix memory leak on kvm_vm_ioctl_create_spapr_tce

From: Nixiaoming
Date: Wed Aug 23 2017 - 21:06:58 EST


>On 23.08.2017 08:06, Paul Mackerras wrote:
>> On Wed, Aug 23, 2017 at 01:43:08AM +0000, Nixiaoming wrote:
>>>> On 22.08.2017 17:15, David Hildenbrand wrote:
>>>>> On 22.08.2017 16:28, nixiaoming wrote:
>>>>>> miss kfree(stt) when anon_inode_getfd return fail so add check
>>>>>> anon_inode_getfd return val, and kfree stt
>>>>>>
>>>>>> Signed-off-by: nixiaoming <nixiaoming@xxxxxxxxxx>
>>>>>> ---
>>>>>> arch/powerpc/kvm/book3s_64_vio.c | 5 ++++-
>>>>>> 1 file changed, 4 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/arch/powerpc/kvm/book3s_64_vio.c
>>>>>> b/arch/powerpc/kvm/book3s_64_vio.c
>>>>>> index a160c14..a0b4459 100644
>>>>>> --- a/arch/powerpc/kvm/book3s_64_vio.c
>>>>>> +++ b/arch/powerpc/kvm/book3s_64_vio.c
>>>>>> @@ -341,8 +341,11 @@ long kvm_vm_ioctl_create_spapr_tce(struct
>>>>>> kvm *kvm,
>>>>>>
>>>>>> mutex_unlock(&kvm->lock);
>>>>>>
>>>>>> - return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
>>>>>> + ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
>>>>>> stt, O_RDWR | O_CLOEXEC);
>>>>>> + if (ret < 0)
>>>>>> + goto fail;
>>>>>> + return ret;
>>>>>>
>>>>>> fail:
>>>>>> if (stt) {
>>>>>>
>>>>>
>>>>>
>>>>> stt has already been added to kvm->arch.spapr_tce_tables, so
>>>>> freeing it is evil IMHO. I don't know that code, so I don't know
>>>>> if there is some other place that will make sure that everything
>>>>> in
>>>>> kvm->arch.spapr_tce_tables will properly get freed, even when no
>>>>> kvm->release
>>>>> function has been called (kvm_spapr_tce_release).
>>>>>
>>>>
>>>> If it is really not freed, than also kvm_put_kvm(stt->kvm) is missing.
>>>>
>>>> --
>>>>
>>>> Thanks,
>>>>
>>>> David
>>>>
>>>
>>> if (!stt) return -ENOMEM;
>>> kvm_get_kvm(kvm);
>>> if anon_inode_getfd return -ENOMEM
>>> The user can not determine whether kvm_get_kvm has been called so
>>> need add kvm_pet_kvm when anon_inode_getfd fail
>>>
>>> stt has already been added to kvm->arch.spapr_tce_tables, but if
>>> anon_inode_getfd fail, stt is unused val, so call list_del_rcu, and
>>> free as quickly as possible
>>>
>>> new patch:
>>>
>>> ---
>>> arch/powerpc/kvm/book3s_64_vio.c | 10 +++++++++-
>>> 1 file changed, 9 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/powerpc/kvm/book3s_64_vio.c
>>> b/arch/powerpc/kvm/book3s_64_vio.c
>>> index a160c14..e2228f1 100644
>>> --- a/arch/powerpc/kvm/book3s_64_vio.c
>>> +++ b/arch/powerpc/kvm/book3s_64_vio.c
>>> @@ -341,8 +341,16 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm
>>> *kvm,
>>>
>>> mutex_unlock(&kvm->lock);
>>>
>>> - return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
>>> + ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
>>> stt, O_RDWR | O_CLOEXEC);
>>> + if (ret < 0) {
>>> + mutex_lock(&kvm->lock);
>>> + list_del_rcu(&stt->list);
>
>... don't we have to take care of rcu synchronization before freeing it?
>
>>> + mutex_unlock(&kvm->lock);
>>> + kvm_put_kvm(kvm);
>>> + goto fail;
>>> + }
>>> + return ret;
>
>of simply
>
>if (!ret)
> return 0;
>
>mutex_lock(&kvm->lock);
>list_del_rcu(&stt->list);
>mutex_unlock(&kvm->lock);
>kvm_put_kvm(kvm);
>
>
>>
>> It seems to me that it would be better to do the anon_inode_getfd()
>> call before the kvm_get_kvm() call, and go to the fail label if it
>> fails.
>
>I would have suggested to not add it to the list before it has been
>fully created (so nobody can have access to it). But I guess than we
>need another level of protection(e.g. kvm->lock).
>
>Am I missing something, or is kvm_vm_ioctl_create_spapr_tce() racy?
>
>The -EBUSY check is done without any locking, so two parallel creators
>could create an inconsistency, no? Shouldn't this all be protected by
>kvm->lock?
>
>>
>> Paul.
>>
>
>Independent of the fix, I'd suggest the following cleanup.
>
>
>From 979f55083ee965e25827a8743e8a9fdb85231a6f Mon Sep 17 00:00:00 2001
>From: David Hildenbrand <david@xxxxxxxxxx>
>Date: Wed, 23 Aug 2017 10:08:58 +0200
>Subject: [PATCH v1 1/1] KVM: PPC: cleanup kvm_vm_ioctl_create_spapr_tce
>
>Let's simplify error handling.
>
>Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
>---
> arch/powerpc/kvm/book3s_64_vio.c | 29 +++++++++++------------------
> 1 file changed, 11 insertions(+), 18 deletions(-)
>
>diff --git a/arch/powerpc/kvm/book3s_64_vio.c
>b/arch/powerpc/kvm/book3s_64_vio.c
>index a160c14304eb..6bac49292296 100644
>--- a/arch/powerpc/kvm/book3s_64_vio.c
>+++ b/arch/powerpc/kvm/book3s_64_vio.c
>@@ -295,8 +295,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>{
> struct kvmppc_spapr_tce_table *stt = NULL;
> unsigned long npages, size;
>- int ret = -ENOMEM;
>- int i;
>+ int i, ret;
>
> if (!args->size)
> return -EINVAL;
>@@ -310,16 +309,13 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
> size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
> npages = kvmppc_tce_pages(size);
> ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
>- if (ret) {
>- stt = NULL;
>- goto fail;
>- }
>+ if (ret)
>+ return ret;
>
>- ret = -ENOMEM;
> stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
> GFP_KERNEL);
> if (!stt)
>- goto fail;
>+ return -ENOMEM;
>
> stt->liobn = args->liobn;
> stt->page_shift = args->page_shift;
>@@ -331,7 +327,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
> for (i = 0; i < npages; i++) {
> stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
> if (!stt->pages[i])
>- goto fail;
>+ goto fail_free;
> }
>
> kvm_get_kvm(kvm);
>@@ -344,15 +340,12 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
> return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
> stt, O_RDWR | O_CLOEXEC);
>
>-fail:
>- if (stt) {
>- for (i = 0; i < npages; i++)
>- if (stt->pages[i])
>- __free_page(stt->pages[i]);
>-
>- kfree(stt);
>- }
>- return ret;
>+fail_free:
>+ for (i = 0; i < npages; i++)
>+ if (stt->pages[i])
>+ __free_page(stt->pages[i]);
>+ kfree(stt);
>+ return -ENOMEM;
> }
>
> static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long
>entry)
>--
>2.13.5
>
>
>--
>
>Thanks,
>
>David
>

Update patch based on advice from David Hildenbrand and Paul Mackerras

---
arch/powerpc/kvm/book3s_64_vio.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index a160c14..517594a 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -334,16 +334,21 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
goto fail;
}

- kvm_get_kvm(kvm);
-
mutex_lock(&kvm->lock);
list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);

mutex_unlock(&kvm->lock);

- return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
+ ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
stt, O_RDWR | O_CLOEXEC);
-
+ if (ret >= 0) {
+ kvm_get_kvm(kvm);
+ return ret;
+ }
+ mutex_lock(&kvm->lock);
+ list_del_rcu(&stt->list);
+ mutex_unlock(&kvm->lock);
+ synchronize_rcu();
fail:
if (stt) {
for (i = 0; i < npages; i++)
--