本文为看雪论坛优秀文章
看雪论坛作者ID:有毒
三、sys_ptrace函数源码分析
/*
* Note that this implementation of ptrace behaves differently from vanilla
* ptrace. Contrary to what the man page says, in the PTRACE_PEEKTEXT,
* PTRACE_PEEKDATA, and PTRACE_PEEKUSER requests the data variable is not
* ignored. Instead, the data variable is expected to point at a location
* (in user space) where the result of the ptrace call is written (instead of
* being returned).
*/
asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
{
struct task_struct *child;
struct user * dummy = NULL;
int i, ret;
lock_kernel();
ret = -EPERM;
if (request == PTRACE_TRACEME) { // 请求为PTRACE_TRACEME
/* 检查是否做好被跟踪的准备 */
if (current->ptrace & PT_PTRACED)
goto out;
ret = security_ptrace(current->parent, current);
if (ret)
goto out;
/* 检查通过,在process flags中设置ptrace位*/
current->ptrace |= PT_PTRACED;
ret = 0;
goto out;
}
/* 非PTRACE_TRACEME的请求*/
ret = -ESRCH; // 首先设置返回值为ESRCH,表明没有该进程,宏定义在errno-base.h头文件中
read_lock(&tasklist_lock);
child = find_task_by_pid(pid); // 查找task结构
if (child)
get_task_struct(child);
read_unlock(&tasklist_lock);
if (!child) // 没有找到task结构,指明所给pid错误
goto out;
ret = -EPERM; // 返回操作未授权
if (pid == 1) // init进程不允许被调试
goto out_tsk;
/* 请求为 PTRACE_ATTACH 时*/
if (request == PTRACE_ATTACH) {
ret = ptrace_attach(child); // 进行attach
goto out_tsk;
}
/* 检查进程是否被跟踪,没有的话不能执行其他功能;
* 当不是PTRACE_KILL时,要求进程状态为TASK_STOPPED;
* 被跟踪进程必须为当前进程的子进程
* 在之前是直接在该代码处实现以上逻辑,现在重新将以上功能封装成了ptrace_check_attach函数
*/
ret = ptrace_check_attach(child, request == PTRACE_KILL);
if (ret < 0)
goto out_tsk;
/* 以下就为根据不同的request参数进行对应的处理了,用一个switch来总括,流程比较简单。*/
switch (request) {
/* when I and D space are separate, these will need to be fixed. 这算预告吗?23333*/
case PTRACE_PEEKTEXT: /* read word at location addr. */
case PTRACE_PEEKDATA: {
unsigned long tmp;
int copied;
copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
ret = -EIO; // 返回I/O错误
if (copied != sizeof(tmp))
break;
ret = put_user(tmp,(unsigned long *) data);
break;
}
/* read the word at location addr in the USER area. */
case PTRACE_PEEKUSR: {
unsigned long tmp;
ret = -EIO;
if ((addr & 3) || addr < 0 ||
addr > sizeof(struct user) - 3)
break;
tmp = 0; /* Default return condition */
if(addr < FRAME_SIZE*sizeof(long))
tmp = getreg(child, addr);
if(addr >= (long) &dummy->u_debugreg[0] &&
addr <= (long) &dummy->u_debugreg[7]){
addr -= (long) &dummy->u_debugreg[0];
addr = addr >> 2;
tmp = child->thread.debugreg[addr];
}
ret = put_user(tmp,(unsigned long *) data);
break;
}
/* when I and D space are separate, this will have to be fixed. */
case PTRACE_POKETEXT: /* write the word at location addr. */
case PTRACE_POKEDATA:
ret = 0;
if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
break;
ret = -EIO;
break;
case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
ret = -EIO;
if ((addr & 3) || addr < 0 ||
addr > sizeof(struct user) - 3)
break;
if (addr < FRAME_SIZE*sizeof(long)) {
ret = putreg(child, addr, data);
break;
}
/* We need to be very careful here. We implicitly
want to modify a portion of the task_struct, and we
have to be selective about what portions we allow someone
to modify. */
ret = -EIO;
if(addr >= (long) &dummy->u_debugreg[0] &&
addr <= (long) &dummy->u_debugreg[7]){
if(addr == (long) &dummy->u_debugreg[4]) break;
if(addr == (long) &dummy->u_debugreg[5]) break;
if(addr < (long) &dummy->u_debugreg[4] &&
((unsigned long) data) >= TASK_SIZE-3) break;
if(addr == (long) &dummy->u_debugreg[7]) {
data &= ~DR_CONTROL_RESERVED;
for(i=0; i<4; i++)
if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
goto out_tsk;
}
addr -= (long) &dummy->u_debugreg;
addr = addr >> 2;
child->thread.debugreg[addr] = data;
ret = 0;
}
break;
case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
case PTRACE_CONT: { /* restart after signal. */
long tmp;
ret = -EIO;
if ((unsigned long) data > _NSIG)
break;
if (request == PTRACE_SYSCALL) {
set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
}
else {
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
}
child->exit_code = data;
/* make sure the single step bit is not set. */
tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG;
put_stack_long(child, EFL_OFFSET,tmp);
wake_up_process(child);
ret = 0;
break;
}
/*
* make the child exit. Best I can do is send it a sigkill.
* perhaps it should be put in the status that it wants to
* exit.
*/
case PTRACE_KILL: {
long tmp;
ret = 0;
if (child->state == TASK_ZOMBIE) /* already dead */
break;
child->exit_code = SIGKILL;
/* make sure the single step bit is not set. */
tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG;
put_stack_long(child, EFL_OFFSET, tmp);
wake_up_process(child);
break;
}
case PTRACE_SINGLESTEP: { /* set the trap flag. */
long tmp;
ret = -EIO;
if ((unsigned long) data > _NSIG)
break;
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
if ((child->ptrace & PT_DTRACE) == 0) {
/* Spurious delayed TF traps may occur */
child->ptrace |= PT_DTRACE;
}
tmp = get_stack_long(child, EFL_OFFSET) | TRAP_FLAG;
put_stack_long(child, EFL_OFFSET, tmp);
child->exit_code = data;
/* give it a chance to run. */
wake_up_process(child);
ret = 0;
break;
}
case PTRACE_DETACH:
/* detach a process that was attached. */
ret = ptrace_detach(child, data);
break;
case PTRACE_GETREGS: { /* Get all gp regs from the child. */
if (!access_ok(VERIFY_WRITE, (unsigned *)data, FRAME_SIZE*sizeof(long))) {
ret = -EIO;
break;
}
for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
__put_user(getreg(child, i),(unsigned long *) data);
data += sizeof(long);
}
ret = 0;
break;
}
case PTRACE_SETREGS: { /* Set all gp regs in the child. */
unsigned long tmp;
if (!access_ok(VERIFY_READ, (unsigned *)data, FRAME_SIZE*sizeof(long))) {
ret = -EIO;
break;
}
for ( i = 0; i < FRAME_SIZE*sizeof(long); i += sizeof(long) ) {
__get_user(tmp, (unsigned long *) data);
putreg(child, i, tmp);
data += sizeof(long);
}
ret = 0;
break;
}
case PTRACE_GETFPREGS: { /* Get the child FPU state. */
if (!access_ok(VERIFY_WRITE, (unsigned *)data,
sizeof(struct user_i387_struct))) {
ret = -EIO;
break;
}
ret = 0;
if (!child->used_math)
init_fpu(child);
get_fpregs((struct user_i387_struct __user *)data, child);
break;
}
case PTRACE_SETFPREGS: { /* Set the child FPU state. */
if (!access_ok(VERIFY_READ, (unsigned *)data,
sizeof(struct user_i387_struct))) {
ret = -EIO;
break;
}
child->used_math = 1;
set_fpregs(child, (struct user_i387_struct __user *)data);
ret = 0;
break;
}
case PTRACE_GETFPXREGS: { /* Get the child extended FPU state. */
if (!access_ok(VERIFY_WRITE, (unsigned *)data,
sizeof(struct user_fxsr_struct))) {
ret = -EIO;
break;
}
if (!child->used_math)
init_fpu(child);
ret = get_fpxregs((struct user_fxsr_struct __user *)data, child);
break;
}
case PTRACE_SETFPXREGS: { /* Set the child extended FPU state. */
if (!access_ok(VERIFY_READ, (unsigned *)data,
sizeof(struct user_fxsr_struct))) {
ret = -EIO;
break;
}
child->used_math = 1;
ret = set_fpxregs(child, (struct user_fxsr_struct __user *)data);
break;
}
case PTRACE_GET_THREAD_AREA:
ret = ptrace_get_thread_area(child,
addr, (struct user_desc __user *) data);
break;
case PTRACE_SET_THREAD_AREA:
ret = ptrace_set_thread_area(child,
addr, (struct user_desc __user *) data);
break;
default:
ret = ptrace_request(child, request, addr, data);
break;
}
out_tsk:
put_task_struct(child);
out:
unlock_kernel();
return ret;
}
/*
* linux/kernel/ptrace.c
*
* (C) Copyright 1999 Linus Torvalds
*
* Common interfaces for "ptrace()" which we do not want
* to continually duplicate across every architecture.
*/
... ...
/*
* ptrace a task: make the debugger its new parent and
* move it to the ptrace list.
*
* Must be called with the tasklist lock write-held.
*/
void __ptrace_link(task_t *child, task_t *new_parent)
{
... ...
}
/*
* unptrace a task: move it back to its original parent and
* remove it from the ptrace list.
*
* Must be called with the tasklist lock write-held.
*/
void __ptrace_unlink(task_t *child)
{
... ...
}
/*
* Check that we have indeed attached to the thing..
*/
int ptrace_check_attach(struct task_struct *child, int kill)
{
if (!(child->ptrace & PT_PTRACED))
return -ESRCH;
if (child->parent != current)
return -ESRCH;
if (!kill) {
if (child->state != TASK_STOPPED)
return -ESRCH;
wait_task_inactive(child);
}
/* All systems go.. */
return 0;
}
int ptrace_attach(struct task_struct *task)
{
... ...
}
int ptrace_detach(struct task_struct *child, unsigned int data)
{
... ...
}
/*
* Access another process' address space.
* Source/target buffer must be kernel space,
* Do not walk the page table directly, use get_user_pages
*/
int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
{
... ...
}
int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len)
{
... ...
}
int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len)
{
... ...
}
static int ptrace_setoptions(struct task_struct *child, long data)
{
... ...
}
static int ptrace_getsiginfo(struct task_struct *child, siginfo_t __user * data)
{
... ...
}
static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data)
{
... ...
}
int ptrace_request(struct task_struct *child, long request,
long addr, long data)
{
int ret = -EIO;
switch (request) {
#ifdef PTRACE_OLDSETOPTIONS
case PTRACE_OLDSETOPTIONS:
#endif
case PTRACE_SETOPTIONS:
ret = ptrace_setoptions(child, data);
break;
case PTRACE_GETEVENTMSG:
ret = put_user(child->ptrace_message, (unsigned long __user *) data);
break;
case PTRACE_GETSIGINFO:
ret = ptrace_getsiginfo(child, (siginfo_t __user *) data);
break;
case PTRACE_SETSIGINFO:
ret = ptrace_setsiginfo(child, (siginfo_t __user *) data);
break;
default:
break;
}
return ret;
}
void ptrace_notify(int exit_code)
{
BUG_ON (!(current->ptrace & PT_PTRACED));
/* Let the debugger run. */
current->exit_code = exit_code;
set_current_state(TASK_STOPPED);
notify_parent(current, SIGCHLD);
schedule();
/*
* Signals sent while we were stopped might set TIF_SIGPENDING.
*/
spin_lock_irq(¤t->sighand->siglock);
recalc_sigpending();
spin_unlock_irq(¤t->sighand->siglock);
}
EXPORT_SYMBOL(ptrace_notify);
SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
unsigned long, data)
{
struct task_struct *child;
long ret;
if (request == PTRACE_TRACEME) { // 请求是否为PTRACE_TRACEME
ret = ptrace_traceme();
if (!ret)
arch_ptrace_attach(current);
goto out;
}
child = find_get_task_by_vpid(pid); // 通过pid请求task结构
if (!child) { // 请求失败,返回ESRCH
ret = -ESRCH;
goto out;
}
if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) {
ret = ptrace_attach(child, request, addr, data);
/*
* Some architectures need to do book-keeping after
* a ptrace attach.
*/
if (!ret)
arch_ptrace_attach(child);
goto out_put_task_struct;
}
ret = ptrace_check_attach(child, request == PTRACE_KILL ||
request == PTRACE_INTERRUPT);
if (ret < 0)
goto out_put_task_struct;
/* 根据不同的架构进行不同的处理 */
ret = arch_ptrace(child, request, addr, data);
if (ret || request != PTRACE_DETACH)
ptrace_unfreeze_traced(child);
out_put_task_struct:
put_task_struct(child);
out:
return ret;
}
四、Request参数详解
/**
* ptrace_traceme -- helper for PTRACE_TRACEME
*
* Performs checks and sets PT_PTRACED.
* Should be used by all ptrace implementations for PTRACE_TRACEME.
*/
static int ptrace_traceme(void)
{
int ret = -EPERM;
write_lock_irq(&tasklist_lock); // 首先让writer拿到读写lock,并且会disable local irp
/* Are we already being traced? */
// 是否已经处于ptrace中
if (!current->ptrace) {
ret = security_ptrace_traceme(current->parent);
/*
* Check PF_EXITING to ensure ->real_parent has not passed
* exit_ptrace(). Otherwise we don't report the error but
* pretend ->real_parent untraces us right after return.
*/
if (!ret && !(current->real_parent->flags & PF_EXITING)) {
// 检查通过,将子进程链接到父进程的ptrace链表中
current->ptrace = PT_PTRACED;
ptrace_link(current, current->real_parent);
}
}
write_unlock_irq(&tasklist_lock);
return ret;
}
/**
* ptrace_event - possibly stop for a ptrace event notification
* @event: %PTRACE_EVENT_* value to report
* @message: value for %PTRACE_GETEVENTMSG to return
*
* Check whether @event is enabled and, if so, report @event and @message
* to the ptrace parent.
*
* Called without locks.
*/
static inline void ptrace_event(int event, unsigned long message)
{
if (unlikely(ptrace_event_enabled(current, event))) {
current->ptrace_message = message;
ptrace_notify((event << 8) | SIGTRAP);
} else if (event == PTRACE_EVENT_EXEC) {
/* legacy EXEC report via SIGTRAP */
if ((current->ptrace & (PT_PTRACED|PT_SEIZED)) == PT_PTRACED)
send_sig(SIGTRAP, current, 0);
}
}
static int exec_binprm(struct linux_binprm *bprm)
{
pid_t old_pid, old_vpid;
int ret, depth;
/* Need to fetch pid before load_binary changes it */
old_pid = current->pid;
rcu_read_lock();
old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
rcu_read_unlock();
.......
audit_bprm(bprm);
trace_sched_process_exec(current, old_pid, bprm);
// 调用ptrace_event,传入的event为PTRACE_EVENT_EXEC
// 直接走发送SIGTRAP的逻辑
ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
proc_exec_connector(current);
return 0;
}
SIGTRAP信号的值为5,专门为调试设计。当kernel发生int 3时,触发回掉函数do_trap(),其代码如下:
asmlinkage void do_trap(struct pt_regs *regs, unsigned long address)
{
force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)address);
regs->pc += 4;
}
int force_sig_fault(int sig, int code, void __user *addr
___ARCH_SI_TRAPNO(int trapno)
___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr))
{
return force_sig_fault_to_task(sig, code, addr
___ARCH_SI_TRAPNO(trapno)
___ARCH_SI_IA64(imm, flags, isr), current);
}
static int ptrace_attach(struct task_struct *task, long request,
unsigned long addr,
unsigned long flags)
{
bool seize = (request == PTRACE_SEIZE);
int retval;
retval = -EIO; /* I/O error*/
/*
* 判断request是PTRACE_SEIZE还是PTRACE_ATTACH。
* 如果request为PTRACE_SEIZE,则进行必要的参数检查,错误时退出。
*/
if (seize) {
if (addr != 0)
goto out;
if (flags & ~(unsigned long)PTRACE_O_MASK)
goto out;
flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT);
} else {
flags = PT_PTRACED;
}
audit_ptrace(task);
/*
* 判断task进程是否为kernel thread(PF_KTHREAD),
* 调用same_thread_group(task, current),判断task是否和current进程在同一个线程组,查看current进程是否有权限trace task进程。
* 如果不符合要求,则直接退出。
*/
retval = -EPERM; /* Operation not permitted, retval = -1 */
if (unlikely(task->flags & PF_KTHREAD))
goto out;
if (same_thread_group(task, current))
goto out;
/*
* Protect exec's credential calculations against our interference;
* SUID, SGID and LSM creds get determined differently
* under ptrace.
*/
retval = -ERESTARTNOINTR;
if (mutex_lock_interruptible(&task->signal->cred_guard_mutex))
goto out;
task_lock(task);
retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS);
task_unlock(task);
if (retval)
goto unlock_creds;
write_lock_irq(&tasklist_lock);
retval = -EPERM;
if (unlikely(task->exit_state))
goto unlock_tasklist;
if (task->ptrace)
goto unlock_tasklist;
/*
* 设置子进程task->ptrace = PT_TRACED,被跟踪状态
*/
if (seize)
flags |= PT_SEIZED;
task->ptrace = flags;
/*
* 调用__ptrace_link(task, current),将task->ptrace_entry链接到current->ptraced链表中。
*/
ptrace_link(task, current);
/* SEIZE doesn't trap tracee on attach */
/*
* 如果是PTRACE_ATTACH请求(PTRACE_SEIZE请求不会停止被跟踪进程),
* 则调用send_sig_info(SIGSTOP,SEND_SIG_PRIV, task);
* 发送SIGSTOP信号,中止task运行,设置task->state为TASK_STOPPED
*/
if (!seize)
send_sig_info(SIGSTOP, SEND_SIG_PRIV, task);
spin_lock(&task->sighand->siglock);
/*
* If the task is already STOPPED, set JOBCTL_TRAP_STOP and
* TRAPPING, and kick it so that it transits to TRACED. TRAPPING
* will be cleared if the child completes the transition or any
* event which clears the group stop states happens. We'll wait
* for the transition to complete before returning from this
* function.
*
* This hides STOPPED -> RUNNING -> TRACED transition from the
* attaching thread but a different thread in the same group can
* still observe the transient RUNNING state. IOW, if another
* thread's WNOHANG wait(2) on the stopped tracee races against
* ATTACH, the wait(2) may fail due to the transient RUNNING.
*
* The following task_is_stopped() test is safe as both transitions
* in and out of STOPPED are protected by siglock.
*
*
*
* 等待task->jobctl的JOBCTL_TRAPPING_BIT位被清零,
* 阻塞时进程状态被设置为TASK_UNINTERRUPTIBLE,引发进程调度
*/
if (task_is_stopped(task) &&
task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING))
signal_wake_up_state(task, __TASK_STOPPED);
spin_unlock(&task->sighand->siglock);
retval = 0;
unlock_tasklist:
write_unlock_irq(&tasklist_lock);
unlock_creds:
mutex_unlock(&task->signal->cred_guard_mutex);
out:
if (!retval) {
/*
* We do not bother to change retval or clear JOBCTL_TRAPPING
* if wait_on_bit() was interrupted by SIGKILL. The tracer will
* not return to user-mode, it will exit and clear this bit in
* __ptrace_unlink() if it wasn't already cleared by the tracee;
* and until then nobody can ptrace this task.
*/
wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, TASK_KILLABLE);
proc_ptrace_connector(task, PTRACE_ATTACH);
}
return retval;
}
int ptrace_attach(struct task_struct *task)
{
int retval;
task_lock(task);
retval = -EPERM;
if (task->pid <= 1) // 不能调试init
goto bad;
if (task == current) // 不能调试自身
goto bad;
if (!task->mm)
goto bad;
/* 鉴权 */
if(((current->uid != task->euid) ||
(current->uid != task->suid) ||
(current->uid != task->uid) ||
(current->gid != task->egid) ||
(current->gid != task->sgid) ||
(current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
goto bad;
rmb();
if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
goto bad;
if (task->ptrace & PT_PTRACED) // 一个进程不能被attach多次
goto bad;
retval = security_ptrace(current, task);
if (retval)
goto bad;
/* Go */
task->ptrace |= PT_PTRACED;
if (capable(CAP_SYS_PTRACE))
task->ptrace |= PT_PTRACE_CAP;
task_unlock(task);
write_lock_irq(&tasklist_lock);
__ptrace_link(task, current); // 调用__ptrace_link(task, current),将task->ptrace_entry链接到current->ptraced链表中
write_unlock_irq(&tasklist_lock);
force_sig_specific(SIGSTOP, task); // 发送SIGSTOP,终止运行
return 0;
bad:
task_unlock(task);
return retval;
}
看雪ID:有毒
https://bbs.pediy.com/user-home-7797730.htm
# 往期推荐
球分享
球点赞
球在看
点击“阅读原文”,了解更多!