Re: [PATCH] tracer for sys_open() - sreadahead

From: Ananth N Mavinakayanahalli
Date: Wed Jan 28 2009 - 04:38:35 EST


On Tue, Jan 27, 2009 at 11:43:05PM +0100, Frederic Weisbecker wrote:
> On Tue, Jan 27, 2009 at 12:08:04PM -0800, Kok, Auke wrote:
> >
> > This tracer monitors regular file open() syscalls. This is a fast
> > and low-overhead alternative to strace, and does not allow or
> > require to be attached to every process.
> >
> > The tracer only logs succesfull calls, as those are the only ones we
> > are currently interested in, and we can determine the absolute path
> > of these files as we log.
> >
> > Signed-off-by: Auke Kok <auke-jan.h.kok@xxxxxxxxx>
>
>
> Hi Auke,
>
> Speaking about a global syscall tracer, I made a patch to trace only the syscalls
> with the function-graph-tracer.
>
> http://lkml.org/lkml/2008/12/30/267
>
> Its approach and purpose is different than a tracer dedicated only to syscalls.
> The function graph tracer traces execution graph of the functions and is more about
> execution time spent and code flow whereas a syscall tracer can provide more specific
> informations about syscalls.
>
> So both are not overlaping.
>
> But the low level part of my patch creates a thread flag _TIF_SYSCALL_TRACE which triggers
> a ptrace hook when set.
> This low-level part can easily be used by all tracers that would like to inspect syscalls.
>
> Just a change is needed: Steven requested that the part inside syscall_trace_enter become
> a tracepoint, making it totally shareable between tracers and easy to turn on and off.
>
> And perhaps the parts that set/clear the flag on all tasks can be shared too.
>
> So we can start with this low-level syscall tracing facility. If you want, I can adapt
> this low-level part and submit a patch this week or the next one to give you this base
> infrastructure.
>
>
> Once we have it, I think a syscall tracer can be fed with new syscalls events through
> several patch iterations, starting with the open and close one :-)

Here is something I did sometime ago that uses utrace. It is per task
doesn't use ftrace and is just intended as a prototype. It traces both
syscalls and returns.

---

Here is the beginnings of a simple utrace based strace. Right now, one
needs to invoke this program with a 'insmod <modname> tid=<tid>'.

The output looks something like this:
[267352.641112] Attached to 32604 => 0xd8f60090
[267353.981046] 2
[267353.981085] 197 0x1 0xbff8cd84 0x86cff4 0x86d4c0 0x86d4c0 0xbff8cd50a = 0
[267353.981097] 192 0x0 0x1000 0x3 0x22 0xffffffff 0x0 = b7f8d000
[267353.981124] 4 0x1 0xb7f8d000 0xd 0xd 0xb7f8d000 0xbff8cda8 = d
[267353.981174] 4 0x1 0xb7f8d000 0x3 0x3 0xb7f8d000 0xbff8c7dc = 3
[267353.981209] 252 0x0 0x0 0x86e0d0 0x0 0x86b274 0xbff8cee8 =
[267353.981215] Task 32604 exited
[267355.460180] Cannot find PID 32604

I know strace does a pretty print, but this is a quick and dirty
prototype.
---

#include <linux/module.h>
#include <linux/utrace.h>
#include <linux/err.h>
#include <asm/syscall.h>

MODULE_DESCRIPTION("syscall trace");
MODULE_LICENSE("GPL");

static int target_tid;

module_param_named(tid, target_tid, int, 0);

static u32 task_syscall_entry(u32 action, struct utrace_attached_engine *engine,
struct task_struct *task, struct pt_regs *regs)
{
long callno;
unsigned long args[6];

callno = syscall_get_nr(task, regs);
syscall_get_arguments(task, regs, 0, 6, args);

printk(KERN_INFO "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx = ",
callno, args[0], args[1], args[2], args[3], args[4], args[5]);
return UTRACE_RESUME;
}

static u32 task_syscall_exit(u32 action, struct utrace_attached_engine *engine,
struct task_struct *task, struct pt_regs *regs)
{
long retval = syscall_get_return_value(task, regs);
printk("%lx\n", retval);
return UTRACE_RESUME;
}

static u32 task_exit(enum utrace_resume_action action,
struct utrace_attached_engine *engine, struct task_struct *task,
long orig_code, long *code)
{
printk("\n");
printk(KERN_INFO "Task %d exited\n", target_tid);
return UTRACE_DETACH;
}

static const struct utrace_engine_ops syscall_ops =
{
.report_syscall_entry = task_syscall_entry,
.report_syscall_exit = task_syscall_exit,
.report_exit = task_exit,
};

static int __init strace_init(void)
{
struct pid *pid;
int ret = 0;
struct task_struct *target;
struct utrace_attached_engine *engine;

pid = find_get_pid(target_tid);
if (pid == NULL) {
printk(KERN_ERR "Cannot find PID %d\n", target_tid);
ret = -ESRCH;
goto out;
}

engine = utrace_attach_pid(pid, UTRACE_ATTACH_CREATE, &syscall_ops, 0);
if (IS_ERR(engine)) {
printk(KERN_ERR "utrace_attach_pid: %ld\n", PTR_ERR(engine));
ret = -EINVAL;
goto out;
} else if (engine == NULL) {
printk(KERN_ERR "utrace_attach_pid => NULL\n");
ret = -EINVAL;
goto out;
} else
printk(KERN_INFO "Attached to %d => 0x%p\n",
target_tid, engine);

/*
* If utrace_attach_pid() succeeded above, we are sure the target
* is valid here
*/
target = pid_task(pid, PIDTYPE_PID);
put_pid(pid);

ret = utrace_set_events(target, engine, UTRACE_EVENT_SYSCALL |
UTRACE_EVENT(EXIT));
if (ret)
printk(KERN_ERR "utrace_set_events returned %d\n", ret);

out:
return ret;
}

static void __exit strace_exit(void)
{
int ret = 0;
struct pid *pid;
struct utrace_attached_engine *engine;
struct task_struct *target;

pid = find_get_pid(target_tid);
if (pid == NULL) {
printk(KERN_ERR "Cannot find PID %d\n", target_tid);
return;
}

target = pid_task(pid, PIDTYPE_PID);
put_pid(pid);
engine = utrace_attach_task(target, UTRACE_ATTACH_MATCH_OPS,
&syscall_ops, 0);
if (IS_ERR(engine))
printk(KERN_ERR "Can't find self: %ld\n", PTR_ERR(engine));
else if (engine == NULL)
printk(KERN_ERR "Can't find self: no match\n");
else {
printk(KERN_INFO "Trying detach 0x%p from %d\n",
engine, target_tid);
ret = utrace_control(target, engine, UTRACE_DETACH);
if (ret)
printk(KERN_ERR "utrace_control returned %d\n",
ret);
}
}

module_init(strace_init);
module_exit(strace_exit);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/