首先参考轮子库Linux的实现,Linux实现的主干逻辑如下:
Xtensa架构有其特殊性,最核心的一点是所谓的window ABI,通过window ABI减少了函数调用过程中压栈出栈的操作,获取了性能上的提升。不知道是不是由于 Window ABI引入的代价,他的的堆栈布局非常规整,有板有眼。这对stack hacking带了了很多便利,从上图中可以看出,Linux中针对xtensa架构的dump_stack实现还是非常的简洁的。
移植结果:
#include <stddef.h>
#include <stdio.h>
#define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
#define THREAD_SIZE (8*1024)
#define SPILL_SLOT(sp, reg) (*(((unsigned long *)(sp)) - 4 + (reg)))
#define MAKE_PC_FROM_RA(ra,sp) (((ra) & 0x3fffffff) | ((sp) & 0xc0000000))
#define __always_inline inline __attribute__((__always_inline__))
struct stackframe {
unsigned long pc;
unsigned long sp;
};
static __always_inline unsigned long *stack_pointer(void)
{
unsigned long *sp;
__asm__ __volatile__ ("mov %0, a1\n" : "=a"(sp));
return sp;
}
static inline void spill_registers(void)
{
#if XCHAL_NUM_AREGS > 16
__asm__ __volatile__ (
"call8 1f\n"
"_j 2f\n"
"retw\n"
".align 4\n"
"1:\n"
#if XCHAL_NUM_AREGS == 32
"_entry a1, 32\n"
"addi a8, a0, 3\n"
"_entry a1, 16\n"
"mov a12, a12\n"
"retw\n"
#else
"_entry a1, 48\n"
"call12 1f\n"
"retw\n"
".align 4\n"
"1:\n"
".rept (" __stringify(XCHAL_NUM_AREGS) " - 16) / 12\n"
"_entry a1, 48\n"
"mov a12, a0\n"
".endr\n"
"_entry a1, 16\n"
#if XCHAL_NUM_AREGS % 12 == 0
"mov a12, a12\n"
#elif XCHAL_NUM_AREGS % 12 == 4
"mov a4, a4\n"
#elif XCHAL_NUM_AREGS % 12 == 8
"mov a8, a8\n"
#endif
"retw\n"
#endif
"2:\n"
: : : "a8", "a9", "memory");
#else
__asm__ __volatile__ (
"mov a12, a12\n"
: : : "memory");
#endif
}
static int recursive_counter = 0;
void walk_stackframe(unsigned long *sp, int (*fn)(struct stackframe *frame, void *data), void *data)
{
recursive_counter ++;
// check nest call scenarios for printf would get lock, which would call dump_stack again.
if(recursive_counter > 20)
{
return;
}
unsigned long a0, a1;
unsigned long sp_end;
a1 = (unsigned long)sp;
sp_end = ALIGN(a1, THREAD_SIZE);
spill_registers();
while (a1 < sp_end)
{
struct stackframe frame;
sp = (unsigned long *)a1;
a0 = SPILL_SLOT(a1, 0);
a1 = SPILL_SLOT(a1, 1);
if (a1 <= (unsigned long)sp)
{
break;
}
frame.pc = MAKE_PC_FROM_RA(a0, a1);
frame.sp = a1;
if (fn(&frame, data))
{
return;
}
}
return;
}
static int show_trace_cb(struct stackframe *frame, void *data)
{
printf("%s line %d, pc 0x%lx, sp 0x%lx, data=%p.\n", __func__, __LINE__, frame->pc, frame->sp, data);
return 0;
}
void show_trace(unsigned long pc, unsigned long *sp)
{
walk_stackframe(sp, show_trace_cb, NULL);
}
void show_stack(unsigned long pc, unsigned long *sp)
{
unsigned long *stack;
stack = sp;
show_trace(pc, stack);
}
static void __dump_stack(void)
{
unsigned long *sp = stack_pointer();
show_stack(0, sp);
}
void dump_stack(void)
{
__dump_stack();
}