Re: [PATCH v5 4/5] selftests/resctrl: Cleanup properly when an error occurs in CAT test

From: Reinette Chatre
Date: Wed Jan 18 2023 - 19:57:56 EST


Hi Shaopeng,

On 1/10/2023 11:58 PM, Shaopeng Tan wrote:
> After creating a child process with fork() in CAT test, if there is an

Please delete the "there is" so that it reads "if an error occurs"

> error occurs or a signal such as SIGINT is received, the parent process
> will be terminated immediately, and therefor the child process will not
> be killed and also resctrlfs is not unmounted.
>
> There is a signal handler registered in CMT/MBM/MBA tests, which kills
> child process, unmount resctrlfs, cleanups result files, etc., if a
> signal such as SIGINT is received.
>
> Commonize the signal handler registered for CMT/MBM/MBA tests and reuse
> it in CAT too.
>
> To reuse the signal handler, make the child process in CAT wait to be
> killed by parent process in any case (an error occurred or a signal was
> received), and when killing child process use global bm_pid instead of
> local bm_pid.
>
> Also, since the MBA/MBA/CMT/CAT are run in order, unregister the signal
> handler at the end of each test so that the signal handler cannot be
> inherited by other tests.
>
> Signed-off-by: Shaopeng Tan <tan.shaopeng@xxxxxxxxxxxxxx>
> ---
> tools/testing/selftests/resctrl/cat_test.c | 26 +++++----
> tools/testing/selftests/resctrl/fill_buf.c | 14 -----
> tools/testing/selftests/resctrl/resctrl.h | 2 +
> tools/testing/selftests/resctrl/resctrl_val.c | 56 ++++++++++++++-----
> 4 files changed, 59 insertions(+), 39 deletions(-)
>
> diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
> index 6a8306b0a109..87302b882929 100644
> --- a/tools/testing/selftests/resctrl/cat_test.c
> +++ b/tools/testing/selftests/resctrl/cat_test.c
> @@ -103,7 +103,6 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
> unsigned long l_mask, l_mask_1;
> int ret, pipefd[2], sibling_cpu_no;
> char pipe_message;
> - pid_t bm_pid;
>
> cache_size = 0;
>
> @@ -181,28 +180,29 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
> strcpy(param.filename, RESULT_FILE_NAME1);
> param.num_of_runs = 0;
> param.cpu_no = sibling_cpu_no;
> + } else {
> + ret = signal_handler_register();
> + if (ret)
> + goto out;

The "goto" will unregister the signal handler. Is that necessary if
the registration failed?

Also, if signal_handler_register() fails then the child will
keep running and run its test ... would child not then run forever?

> }
>
> remove(param.filename);
>
> ret = cat_val(&param);
> - if (ret)
> - return ret;
> -
> - ret = check_results(&param);
> - if (ret)
> - return ret;
> + if (ret == 0)
> + ret = check_results(&param);
>
> if (bm_pid == 0) {
> /* Tell parent that child is ready */
> close(pipefd[0]);
> pipe_message = 1;
> if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
> - sizeof(pipe_message)) {
> - close(pipefd[1]);
> + sizeof(pipe_message))
> + /*
> + * Just print the error message.
> + * Let while(1) run and wait for itself to be killed.
> + */
> perror("# failed signaling parent process");
> - return errno;
> - }
>
> close(pipefd[1]);
> while (1)
> @@ -226,5 +226,7 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
> if (bm_pid)
> umount_resctrlfs();
>
> - return 0;
> +out:
> + ret = signal_handler_unregister();
> + return ret;

Be careful here ... any earlier value of "ret" will be overwritten with
the result of signal_handler_unregister(). I think the return of
signal_handler_unregister() can be ignored. There will be an
error message if it failed.

> }
> diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
> index 56ccbeae0638..322c6812e15c 100644
> --- a/tools/testing/selftests/resctrl/fill_buf.c
> +++ b/tools/testing/selftests/resctrl/fill_buf.c
> @@ -33,14 +33,6 @@ static void sb(void)
> #endif
> }
>
> -static void ctrl_handler(int signo)
> -{
> - free(startptr);
> - printf("\nEnding\n");
> - sb();
> - exit(EXIT_SUCCESS);
> -}
> -
> static void cl_flush(void *p)
> {
> #if defined(__i386) || defined(__x86_64)
> @@ -198,12 +190,6 @@ int run_fill_buf(unsigned long span, int malloc_and_init_memory,
> unsigned long long cache_size = span;
> int ret;
>
> - /* set up ctrl-c handler */
> - if (signal(SIGINT, ctrl_handler) == SIG_ERR)
> - printf("Failed to catch SIGINT!\n");
> - if (signal(SIGHUP, ctrl_handler) == SIG_ERR)
> - printf("Failed to catch SIGHUP!\n");
> -
> ret = fill_cache(cache_size, malloc_and_init_memory, memflush, op,
> resctrl_val);
> if (ret) {
> diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
> index f0ded31fb3c7..14a5e21497e1 100644
> --- a/tools/testing/selftests/resctrl/resctrl.h
> +++ b/tools/testing/selftests/resctrl/resctrl.h
> @@ -107,6 +107,8 @@ void mba_test_cleanup(void);
> int get_cbm_mask(char *cache_type, char *cbm_mask);
> int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size);
> void ctrlc_handler(int signum, siginfo_t *info, void *ptr);
> +int signal_handler_register(void);
> +int signal_handler_unregister(void);
> int cat_val(struct resctrl_val_param *param);
> void cat_test_cleanup(void);
> int cat_perf_miss_val(int cpu_no, int no_of_bits, char *cache_type);
> diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
> index 6948843bf995..91a3cf8b308b 100644
> --- a/tools/testing/selftests/resctrl/resctrl_val.c
> +++ b/tools/testing/selftests/resctrl/resctrl_val.c
> @@ -476,6 +476,46 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
> exit(EXIT_SUCCESS);
> }
>
> +struct sigaction sigact;
> +
> +/*
> + * Register CTRL-C handler for parent, as it has to kill
> + * child process before exiting
> + */
> +int signal_handler_register(void)
> +{
> + int ret = 0;
> + struct sigaction sigact;

Why is there a global sigact as well as a local sigact?

Also, please do keep using reverse fir-tree format.

> +
> + sigact.sa_sigaction = ctrlc_handler;
> + sigemptyset(&sigact.sa_mask);
> + sigact.sa_flags = SA_SIGINFO;
> + if (sigaction(SIGINT, &sigact, NULL) ||
> + sigaction(SIGTERM, &sigact, NULL) ||
> + sigaction(SIGHUP, &sigact, NULL)) {
> + perror("# sigaction");
> + ret = errno;

I understand that you copied from the original code here but I
do think there is an issue here in that errno is undefined after
a library call. perror() will print errno message so keeping
it (errno) around may not be useful. Please do keep the custom
of returning negative value as error though. I think just
returning -1 would work.

> + }
> + return ret;
> +}
> +
> +/* reset signal handler to SIG_DFL. */
> +int signal_handler_unregister(void)
> +{
> + int ret = 0;
> + struct sigaction sigact;
> +
> + sigact.sa_handler = SIG_DFL;
> + sigemptyset(&sigact.sa_mask);
> + if (sigaction(SIGINT, &sigact, NULL) ||
> + sigaction(SIGTERM, &sigact, NULL) ||
> + sigaction(SIGHUP, &sigact, NULL)) {
> + perror("# sigaction");
> + ret = errno;

Same comment about errno and returning -1 on failure.

> + }
> + return ret;
> +}
> +
> /*
> * print_results_bw: the memory bandwidth results are stored in a file
> * @filename: file that stores the results
> @@ -671,20 +711,9 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
>
> ksft_print_msg("Benchmark PID: %d\n", bm_pid);
>
> - /*
> - * Register CTRL-C handler for parent, as it has to kill benchmark
> - * before exiting
> - */
> - sigact.sa_sigaction = ctrlc_handler;
> - sigemptyset(&sigact.sa_mask);
> - sigact.sa_flags = SA_SIGINFO;
> - if (sigaction(SIGINT, &sigact, NULL) ||
> - sigaction(SIGTERM, &sigact, NULL) ||
> - sigaction(SIGHUP, &sigact, NULL)) {
> - perror("# sigaction");
> - ret = errno;
> + ret = signal_handler_register();
> + if (ret)
> goto out;
> - }
>
> value.sival_ptr = benchmark_cmd;
>
> @@ -764,6 +793,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
> out:
> kill(bm_pid, SIGKILL);
> umount_resctrlfs();
> + ret = signal_handler_unregister();
>
> return ret;

Same here ... any earlier value of ret will be overwritten with result
of signal_handler_unregister().


Reinette

> }