Hi Sumanth, Sorry for the late response. I just came back from Sabbatical yesterday. On 2025-08-06 1:37 a.m., Sumanth Korikkar wrote: > On Wed, Jul 23, 2025 at 10:06:26AM +0200, Sumanth Korikkar wrote: >> On Tue, May 20, 2025 at 11:16:35AM -0700, kan.liang@xxxxxxxxxxxxxxx wrote: >>> From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx> >>> >>> The throttle support has been added in the generic code. Remove >>> the driver-specific throttle support. >>> >>> Besides the throttle, perf_event_overflow may return true because of >>> event_limit. It already does an inatomic event disable. The pmu->stop >>> is not required either. >>> >>> Tested-by: Thomas Richter <tmricht@xxxxxxxxxxxxx> >>> Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx> >>> Cc: Thomas Richter <tmricht@xxxxxxxxxxxxx> >>> Cc: linux-s390@xxxxxxxxxxxxxxx >>> --- >>> arch/s390/kernel/perf_cpum_cf.c | 2 -- >>> arch/s390/kernel/perf_cpum_sf.c | 5 +---- >>> 2 files changed, 1 insertion(+), 6 deletions(-) >>> >>> diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c >>> index e657fad7e376..6a262e198e35 100644 >>> --- a/arch/s390/kernel/perf_cpum_cf.c >>> +++ b/arch/s390/kernel/perf_cpum_cf.c >>> @@ -980,8 +980,6 @@ static int cfdiag_push_sample(struct perf_event *event, >>> } >>> >>> overflow = perf_event_overflow(event, &data, ®s); >>> - if (overflow) >>> - event->pmu->stop(event, 0); >>> >>> perf_event_update_userpage(event); >>> return overflow; >>> diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c >>> index ad22799d8a7d..91469401f2c9 100644 >>> --- a/arch/s390/kernel/perf_cpum_sf.c >>> +++ b/arch/s390/kernel/perf_cpum_sf.c >>> @@ -1072,10 +1072,7 @@ static int perf_push_sample(struct perf_event *event, >>> overflow = 0; >>> if (perf_event_exclude(event, ®s, sde_regs)) >>> goto out; >>> - if (perf_event_overflow(event, &data, ®s)) { >>> - overflow = 1; >>> - event->pmu->stop(event, 0); >>> - } >>> + overflow = perf_event_overflow(event, &data, ®s); >>> perf_event_update_userpage(event); >>> out: >>> return overflow; >>> -- >>> 2.38.1 >> >> Hi all, >> >> This seems to break POLL_HUP delivery to userspace - when event_limit reaches 0 >> >> From perf_event_open man page: >> PERF_EVENT_IOC_REFRESH >> Non-inherited overflow counters can use this to enable a >> counter for a number of overflows specified by the >> argument, after which it is disabled. Subsequent calls of >> this ioctl add the argument value to the current count. An >> overflow notification with POLL_IN set will happen on each >> overflow until the count reaches 0; when that happens a >> notification with POLL_HUP set is sent and the event is >> disabled. >> >> When the event_limit reaches 0, the POLL_HUP signal is expected to be >> sent. Prior to this patch, an explicit call to event->stop() was made, >> which may have contributed to ensuring that the POLL_HUP signal was >> ultimately delivered. However, after this change, I often did not >> observe the POLL_HUP signal being delivered as expected in the end The event_limit case also returns 1. I missed it when fixing the throttle issue. :( I didn't use the IOC_REFRESH before. According to the kernel code, it reschedules all the events of the event->pmu, when the ioctl is invoked. So we just need to move the event->pmu->stop() to the generic code as below. It should keep the behavior unchanged. Could you please try the below fix? diff --git a/kernel/events/core.c b/kernel/events/core.c index 14ae43694833..f492cbcd3bb6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10341,6 +10341,7 @@ static int __perf_event_overflow(struct perf_event *event, ret = 1; event->pending_kill = POLL_HUP; perf_event_disable_inatomic(event); + event->pmu->stop(event, 0); } if (event->attr.sigtrap) { Thanks, Kan >> >> Example program: >> output: >> Computation result: 49951804672 >> count.hup: 0 count.pollin: 22 >> >> Expected output should be: >> count.hup: 1 in the end >> >> #define _GNU_SOURCE >> #include <time.h> >> #include <stdbool.h> >> #include <signal.h> >> #include <poll.h> >> #include <fcntl.h> >> #include <stdio.h> >> #include <stdlib.h> >> #include <string.h> >> #include <unistd.h> >> #include <time.h> >> >> #include <sys/ioctl.h> >> #include <sys/syscall.h> >> #include <linux/perf_event.h> >> >> static struct signal_counts { >> int in; >> int out; >> int hup; >> int unknown; >> } count; >> >> >> static unsigned long sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | >> PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | PERF_SAMPLE_READ | >> PERF_SAMPLE_ID | PERF_SAMPLE_CPU | >> PERF_SAMPLE_PERIOD | PERF_SAMPLE_STREAM_ID | PERF_SAMPLE_RAW; >> >> static void sighandler(int signum, siginfo_t *info, void *uc) >> { >> switch(info->si_code) { >> case POLL_IN: count.in++; break; >> case POLL_OUT: count.out++; break; >> case POLL_HUP: count.hup++; break; >> default: count.unknown++; break; >> } >> } >> >> void generate_load(unsigned long long iterations) { >> unsigned long long sum = 0; >> srand(time(0)); >> >> for (unsigned long long i = 0; i < iterations; ++i) { >> int rnd = rand(); >> sum += (rnd ^ (rnd >> 3)) % 1000; >> } >> printf("Computation result: %llu\n", sum); >> } >> >> void perf_attr(struct perf_event_attr *pe, >> unsigned long config, unsigned long period, bool freq, >> unsigned long bits) >> { >> memset(pe, 0, sizeof(struct perf_event_attr)); >> pe->size = sizeof(struct perf_event_attr); >> pe->type = PERF_TYPE_HARDWARE; >> pe->config = PERF_COUNT_HW_CPU_CYCLES; >> pe->exclude_kernel = 0; >> pe->sample_period = 50000; >> pe->freq = 1; >> pe->disabled = 1; >> pe->config = config; >> pe->freq = freq; >> pe->sample_type = bits; >> } >> >> int main(int argc, char **argv) >> { >> int fd, signo = SIGIO, rc = -1; >> struct sigaction sa, sa_old; >> struct perf_event_attr pe; >> >> perf_attr(&pe, PERF_COUNT_HW_CPU_CYCLES, 50000, 1, sample_type); >> /* Set up overflow handler */ >> memset(&sa, 0, sizeof(struct sigaction)); >> memset(&sa_old, 0, sizeof(struct sigaction)); >> sa.sa_sigaction = sighandler; >> sa.sa_flags = SA_SIGINFO; >> if (sigaction(signo, &sa, &sa_old) < 0) >> goto out; >> >> fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0); >> if (fd < 0) >> return rc; >> >> rc = fcntl(fd, F_SETFL, O_RDWR | O_NONBLOCK | O_ASYNC); >> rc |= fcntl(fd, F_SETSIG, signo); >> rc |= fcntl(fd, F_SETOWN, getpid()); >> if (rc) >> goto out; >> >> rc = ioctl(fd, PERF_EVENT_IOC_REFRESH, 2500); >> if (rc) >> goto out; >> >> generate_load(100000000ULL); >> sigaction(signo, &sa_old, NULL); >> printf("count.hup: %d count.pollin: %d\n", count.hup, count.in); >> close(fd); >> return 0; >> out: >> return rc; >> } > > Hi Kan, > > It would be great if you could share your feedback on this issue. > > Thank you.