On 5/26/2025 5:17 PM, David Hildenbrand wrote: > On 20.05.25 12:28, Chenyi Qiang wrote: >> The current error handling is simple with the following assumption: >> - QEMU will quit instead of resuming the guest if kvm_convert_memory() >> fails, thus no need to do rollback. >> - The convert range is required to be in the desired state. It is not >> allowed to handle the mixture case. >> - The conversion from shared to private is a non-failure operation. >> >> This is sufficient for now as complext error handling is not required. >> For future extension, add some potential error handling. >> - For private to shared conversion, do the rollback operation if >> ram_block_attribute_notify_to_populated() fails. >> - For shared to private conversion, still assert it as a non-failure >> operation for now. It could be an easy fail path with in-place >> conversion, which will likely have to retry the conversion until it >> works in the future. >> - For mixture case, process individual blocks for ease of rollback. >> >> Signed-off-by: Chenyi Qiang <chenyi.qiang@xxxxxxxxx> >> --- >> system/ram-block-attribute.c | 116 +++++++++++++++++++++++++++-------- >> 1 file changed, 90 insertions(+), 26 deletions(-) >> >> diff --git a/system/ram-block-attribute.c b/system/ram-block-attribute.c >> index 387501b569..0af3396aa4 100644 >> --- a/system/ram-block-attribute.c >> +++ b/system/ram-block-attribute.c >> @@ -289,7 +289,12 @@ static int >> ram_block_attribute_notify_to_discard(RamBlockAttribute *attr, >> } >> ret = rdl->notify_discard(rdl, &tmp); >> if (ret) { >> - break; >> + /* >> + * The current to_private listeners (VFIO dma_unmap and >> + * KVM set_attribute_private) are non-failing operations. >> + * TODO: add rollback operations if it is allowed to fail. >> + */ >> + g_assert(ret); >> } >> } >> > > If it's not allowed to fail for now, then patch #8 does not make sense > and should be dropped :) It was intended for future extension as in-place conversion to_private allows it to fail. So I add the patch #8. But as you mentioned, since the conversion path is changing, and maybe it is easier to handle from KVM code directly. Let me drop patch #8 and wait for the in-place conversion to mature. > > The implementations (vfio) should likely exit() instead on unexpected > errors when discarding. After drop patch #8, maybe keep vfio discard handling as it was. Adding an additional exit() is also OK to me since it's non-fail case. > > > > Why not squash all the below into the corresponding patch? Looks mostly > like handling partial conversions correctly (as discussed previously)? I extract these two handling 1) mixture conversion; 2) operation rollback into this individual patch because they are not the practical cases and are untested. For 1), I still don't see any real case which will convert a range with mixture attributes. For 2), current failure of memory conversion (as seen in kvm_cpu_exec() ->kvm_convert_memory()) will cause the QEMU to quit instead of resuming guest. Doing the rollback seems useless at present. > >> @@ -300,7 +305,7 @@ static int >> ram_block_attribute_notify_to_populated(RamBlockAttribute *attr, >> uint64_t offset, uint64_t size) >> { >> - RamDiscardListener *rdl; >> + RamDiscardListener *rdl, *rdl2; >> int ret = 0; >> QLIST_FOREACH(rdl, &attr->rdl_list, next) { >> @@ -315,6 +320,20 @@ >> ram_block_attribute_notify_to_populated(RamBlockAttribute *attr, >> } >> } >> + if (ret) { >> + /* Notify all already-notified listeners. */ >> + QLIST_FOREACH(rdl2, &attr->rdl_list, next) { >> + MemoryRegionSection tmp = *rdl2->section; >> + >> + if (rdl == rdl2) { >> + break; >> + } >> + if (!memory_region_section_intersect_range(&tmp, offset, >> size)) { >> + continue; >> + } >> + rdl2->notify_discard(rdl2, &tmp); >> + } >> + } >> return ret; >> } >> @@ -353,6 +372,9 @@ int >> ram_block_attribute_state_change(RamBlockAttribute *attr, uint64_t >> offset, >> const int block_size = ram_block_attribute_get_block_size(attr); >> const unsigned long first_bit = offset / block_size; >> const unsigned long nbits = size / block_size; >> + const uint64_t end = offset + size; >> + unsigned long bit; >> + uint64_t cur; >> int ret = 0; >> if (!ram_block_attribute_is_valid_range(attr, offset, size)) { >> @@ -361,32 +383,74 @@ int >> ram_block_attribute_state_change(RamBlockAttribute *attr, uint64_t >> offset, >> return -1; >> } >> - /* Already discard/populated */ >> - if ((ram_block_attribute_is_range_discard(attr, offset, size) && >> - to_private) || >> - (ram_block_attribute_is_range_populated(attr, offset, size) && >> - !to_private)) { >> - return 0; >> - } >> - >> - /* Unexpected mixture */ >> - if ((!ram_block_attribute_is_range_populated(attr, offset, size) && >> - to_private) || >> - (!ram_block_attribute_is_range_discard(attr, offset, size) && >> - !to_private)) { >> - error_report("%s, the range is not all in the desired state: " >> - "(offset 0x%lx, size 0x%lx), %s", >> - __func__, offset, size, >> - to_private ? "private" : "shared"); >> - return -1; >> - } >> - >> if (to_private) { >> - bitmap_clear(attr->bitmap, first_bit, nbits); >> - ret = ram_block_attribute_notify_to_discard(attr, offset, size); >> + if (ram_block_attribute_is_range_discard(attr, offset, size)) { >> + /* Already private */ >> + } else if (!ram_block_attribute_is_range_populated(attr, offset, >> + size)) { >> + /* Unexpected mixture: process individual blocks */ >> + for (cur = offset; cur < end; cur += block_size) { >> + bit = cur / block_size; >> + if (!test_bit(bit, attr->bitmap)) { >> + continue; >> + } >> + clear_bit(bit, attr->bitmap); >> + ram_block_attribute_notify_to_discard(attr, cur, >> block_size); >> + } >> + } else { >> + /* Completely shared */ >> + bitmap_clear(attr->bitmap, first_bit, nbits); >> + ram_block_attribute_notify_to_discard(attr, offset, size); >> + } >> } else { >> - bitmap_set(attr->bitmap, first_bit, nbits); >> - ret = ram_block_attribute_notify_to_populated(attr, offset, >> size); >> + if (ram_block_attribute_is_range_populated(attr, offset, >> size)) { >> + /* Already shared */ >> + } else if (!ram_block_attribute_is_range_discard(attr, >> offset, size)) { >> + /* Unexpected mixture: process individual blocks */ >> + unsigned long *modified_bitmap = bitmap_new(nbits); >> + >> + for (cur = offset; cur < end; cur += block_size) { >> + bit = cur / block_size; >> + if (test_bit(bit, attr->bitmap)) { >> + continue; >> + } >> + set_bit(bit, attr->bitmap); >> + ret = ram_block_attribute_notify_to_populated(attr, cur, >> + block_size); >> + if (!ret) { >> + set_bit(bit - first_bit, modified_bitmap); >> + continue; >> + } >> + clear_bit(bit, attr->bitmap); >> + break; >> + } >> + >> + if (ret) { >> + /* >> + * Very unexpected: something went wrong. Revert to >> the old >> + * state, marking only the blocks as private that we >> converted >> + * to shared. >> + */ >> + for (cur = offset; cur < end; cur += block_size) { >> + bit = cur / block_size; >> + if (!test_bit(bit - first_bit, modified_bitmap)) { >> + continue; >> + } >> + assert(test_bit(bit, attr->bitmap)); >> + clear_bit(bit, attr->bitmap); >> + ram_block_attribute_notify_to_discard(attr, cur, >> + block_size); >> + } >> + } >> + g_free(modified_bitmap); >> + } else { >> + /* Complete private */ >> + bitmap_set(attr->bitmap, first_bit, nbits); >> + ret = ram_block_attribute_notify_to_populated(attr, >> offset, size); >> + if (ret) { >> + bitmap_clear(attr->bitmap, first_bit, nbits); >> + } >> + } >> } >> return ret; > >