opt-arm.c (10265B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Kernel Probes Jump Optimization (Optprobes) 4 * 5 * Copyright (C) IBM Corporation, 2002, 2004 6 * Copyright (C) Hitachi Ltd., 2012 7 * Copyright (C) Huawei Inc., 2014 8 */ 9 10#include <linux/kprobes.h> 11#include <linux/jump_label.h> 12#include <asm/kprobes.h> 13#include <asm/cacheflush.h> 14/* for arm_gen_branch */ 15#include <asm/insn.h> 16/* for patch_text */ 17#include <asm/patch.h> 18 19#include "core.h" 20 21/* 22 * See register_usage_flags. If the probed instruction doesn't use PC, 23 * we can copy it into template and have it executed directly without 24 * simulation or emulation. 25 */ 26#define ARM_REG_PC 15 27#define can_kprobe_direct_exec(m) (!test_bit(ARM_REG_PC, &(m))) 28 29/* 30 * NOTE: the first sub and add instruction will be modified according 31 * to the stack cost of the instruction. 32 */ 33asm ( 34 ".global optprobe_template_entry\n" 35 "optprobe_template_entry:\n" 36 ".global optprobe_template_sub_sp\n" 37 "optprobe_template_sub_sp:" 38 " sub sp, sp, #0xff\n" 39 " stmia sp, {r0 - r14} \n" 40 ".global optprobe_template_add_sp\n" 41 "optprobe_template_add_sp:" 42 " add r3, sp, #0xff\n" 43 " str r3, [sp, #52]\n" 44 " mrs r4, cpsr\n" 45 " str r4, [sp, #64]\n" 46 " mov r1, sp\n" 47 " ldr r0, 1f\n" 48 " ldr r2, 2f\n" 49 /* 50 * AEABI requires an 8-bytes alignment stack. If 51 * SP % 8 != 0 (SP % 4 == 0 should be ensured), 52 * alloc more bytes here. 53 */ 54 " and r4, sp, #4\n" 55 " sub sp, sp, r4\n" 56#if __LINUX_ARM_ARCH__ >= 5 57 " blx r2\n" 58#else 59 " mov lr, pc\n" 60 " mov pc, r2\n" 61#endif 62 " add sp, sp, r4\n" 63 " ldr r1, [sp, #64]\n" 64 " tst r1, #"__stringify(PSR_T_BIT)"\n" 65 " ldrne r2, [sp, #60]\n" 66 " orrne r2, #1\n" 67 " strne r2, [sp, #60] @ set bit0 of PC for thumb\n" 68 " msr cpsr_cxsf, r1\n" 69 ".global optprobe_template_restore_begin\n" 70 "optprobe_template_restore_begin:\n" 71 " ldmia sp, {r0 - r15}\n" 72 ".global optprobe_template_restore_orig_insn\n" 73 "optprobe_template_restore_orig_insn:\n" 74 " nop\n" 75 ".global optprobe_template_restore_end\n" 76 "optprobe_template_restore_end:\n" 77 " nop\n" 78 ".global optprobe_template_val\n" 79 "optprobe_template_val:\n" 80 "1: .long 0\n" 81 ".global optprobe_template_call\n" 82 "optprobe_template_call:\n" 83 "2: .long 0\n" 84 ".global optprobe_template_end\n" 85 "optprobe_template_end:\n"); 86 87#define TMPL_VAL_IDX \ 88 ((unsigned long *)optprobe_template_val - (unsigned long *)optprobe_template_entry) 89#define TMPL_CALL_IDX \ 90 ((unsigned long *)optprobe_template_call - (unsigned long *)optprobe_template_entry) 91#define TMPL_END_IDX \ 92 ((unsigned long *)optprobe_template_end - (unsigned long *)optprobe_template_entry) 93#define TMPL_ADD_SP \ 94 ((unsigned long *)optprobe_template_add_sp - (unsigned long *)optprobe_template_entry) 95#define TMPL_SUB_SP \ 96 ((unsigned long *)optprobe_template_sub_sp - (unsigned long *)optprobe_template_entry) 97#define TMPL_RESTORE_BEGIN \ 98 ((unsigned long *)optprobe_template_restore_begin - (unsigned long *)optprobe_template_entry) 99#define TMPL_RESTORE_ORIGN_INSN \ 100 ((unsigned long *)optprobe_template_restore_orig_insn - (unsigned long *)optprobe_template_entry) 101#define TMPL_RESTORE_END \ 102 ((unsigned long *)optprobe_template_restore_end - (unsigned long *)optprobe_template_entry) 103 104/* 105 * ARM can always optimize an instruction when using ARM ISA, except 106 * instructions like 'str r0, [sp, r1]' which store to stack and unable 107 * to determine stack space consumption statically. 108 */ 109int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) 110{ 111 return optinsn->insn != NULL; 112} 113 114/* 115 * In ARM ISA, kprobe opt always replace one instruction (4 bytes 116 * aligned and 4 bytes long). It is impossible to encounter another 117 * kprobe in the address range. So always return 0. 118 */ 119int arch_check_optimized_kprobe(struct optimized_kprobe *op) 120{ 121 return 0; 122} 123 124/* Caller must ensure addr & 3 == 0 */ 125static int can_optimize(struct kprobe *kp) 126{ 127 if (kp->ainsn.stack_space < 0) 128 return 0; 129 /* 130 * 255 is the biggest imm can be used in 'sub r0, r0, #<imm>'. 131 * Number larger than 255 needs special encoding. 132 */ 133 if (kp->ainsn.stack_space > 255 - sizeof(struct pt_regs)) 134 return 0; 135 return 1; 136} 137 138/* Free optimized instruction slot */ 139static void 140__arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) 141{ 142 if (op->optinsn.insn) { 143 free_optinsn_slot(op->optinsn.insn, dirty); 144 op->optinsn.insn = NULL; 145 } 146} 147 148extern void kprobe_handler(struct pt_regs *regs); 149 150static void 151optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) 152{ 153 unsigned long flags; 154 struct kprobe *p = &op->kp; 155 struct kprobe_ctlblk *kcb; 156 157 /* Save skipped registers */ 158 regs->ARM_pc = (unsigned long)op->kp.addr; 159 regs->ARM_ORIG_r0 = ~0UL; 160 161 local_irq_save(flags); 162 kcb = get_kprobe_ctlblk(); 163 164 if (kprobe_running()) { 165 kprobes_inc_nmissed_count(&op->kp); 166 } else { 167 __this_cpu_write(current_kprobe, &op->kp); 168 kcb->kprobe_status = KPROBE_HIT_ACTIVE; 169 opt_pre_handler(&op->kp, regs); 170 __this_cpu_write(current_kprobe, NULL); 171 } 172 173 /* 174 * We singlestep the replaced instruction only when it can't be 175 * executed directly during restore. 176 */ 177 if (!p->ainsn.kprobe_direct_exec) 178 op->kp.ainsn.insn_singlestep(p->opcode, &p->ainsn, regs); 179 180 local_irq_restore(flags); 181} 182NOKPROBE_SYMBOL(optimized_callback) 183 184int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig) 185{ 186 kprobe_opcode_t *code; 187 unsigned long rel_chk; 188 unsigned long val; 189 unsigned long stack_protect = sizeof(struct pt_regs); 190 191 if (!can_optimize(orig)) 192 return -EILSEQ; 193 194 code = get_optinsn_slot(); 195 if (!code) 196 return -ENOMEM; 197 198 /* 199 * Verify if the address gap is in 32MiB range, because this uses 200 * a relative jump. 201 * 202 * kprobe opt use a 'b' instruction to branch to optinsn.insn. 203 * According to ARM manual, branch instruction is: 204 * 205 * 31 28 27 24 23 0 206 * +------+---+---+---+---+----------------+ 207 * | cond | 1 | 0 | 1 | 0 | imm24 | 208 * +------+---+---+---+---+----------------+ 209 * 210 * imm24 is a signed 24 bits integer. The real branch offset is computed 211 * by: imm32 = SignExtend(imm24:'00', 32); 212 * 213 * So the maximum forward branch should be: 214 * (0x007fffff << 2) = 0x01fffffc = 0x1fffffc 215 * The maximum backword branch should be: 216 * (0xff800000 << 2) = 0xfe000000 = -0x2000000 217 * 218 * We can simply check (rel & 0xfe000003): 219 * if rel is positive, (rel & 0xfe000000) shoule be 0 220 * if rel is negitive, (rel & 0xfe000000) should be 0xfe000000 221 * the last '3' is used for alignment checking. 222 */ 223 rel_chk = (unsigned long)((long)code - 224 (long)orig->addr + 8) & 0xfe000003; 225 226 if ((rel_chk != 0) && (rel_chk != 0xfe000000)) { 227 /* 228 * Different from x86, we free code buf directly instead of 229 * calling __arch_remove_optimized_kprobe() because 230 * we have not fill any field in op. 231 */ 232 free_optinsn_slot(code, 0); 233 return -ERANGE; 234 } 235 236 /* Copy arch-dep-instance from template. */ 237 memcpy(code, (unsigned long *)optprobe_template_entry, 238 TMPL_END_IDX * sizeof(kprobe_opcode_t)); 239 240 /* Adjust buffer according to instruction. */ 241 BUG_ON(orig->ainsn.stack_space < 0); 242 243 stack_protect += orig->ainsn.stack_space; 244 245 /* Should have been filtered by can_optimize(). */ 246 BUG_ON(stack_protect > 255); 247 248 /* Create a 'sub sp, sp, #<stack_protect>' */ 249 code[TMPL_SUB_SP] = __opcode_to_mem_arm(0xe24dd000 | stack_protect); 250 /* Create a 'add r3, sp, #<stack_protect>' */ 251 code[TMPL_ADD_SP] = __opcode_to_mem_arm(0xe28d3000 | stack_protect); 252 253 /* Set probe information */ 254 val = (unsigned long)op; 255 code[TMPL_VAL_IDX] = val; 256 257 /* Set probe function call */ 258 val = (unsigned long)optimized_callback; 259 code[TMPL_CALL_IDX] = val; 260 261 /* If possible, copy insn and have it executed during restore */ 262 orig->ainsn.kprobe_direct_exec = false; 263 if (can_kprobe_direct_exec(orig->ainsn.register_usage_flags)) { 264 kprobe_opcode_t final_branch = arm_gen_branch( 265 (unsigned long)(&code[TMPL_RESTORE_END]), 266 (unsigned long)(op->kp.addr) + 4); 267 if (final_branch != 0) { 268 /* 269 * Replace original 'ldmia sp, {r0 - r15}' with 270 * 'ldmia {r0 - r14}', restore all registers except pc. 271 */ 272 code[TMPL_RESTORE_BEGIN] = __opcode_to_mem_arm(0xe89d7fff); 273 274 /* The original probed instruction */ 275 code[TMPL_RESTORE_ORIGN_INSN] = __opcode_to_mem_arm(orig->opcode); 276 277 /* Jump back to next instruction */ 278 code[TMPL_RESTORE_END] = __opcode_to_mem_arm(final_branch); 279 orig->ainsn.kprobe_direct_exec = true; 280 } 281 } 282 283 flush_icache_range((unsigned long)code, 284 (unsigned long)(&code[TMPL_END_IDX])); 285 286 /* Set op->optinsn.insn means prepared. */ 287 op->optinsn.insn = code; 288 return 0; 289} 290 291void __kprobes arch_optimize_kprobes(struct list_head *oplist) 292{ 293 struct optimized_kprobe *op, *tmp; 294 295 list_for_each_entry_safe(op, tmp, oplist, list) { 296 unsigned long insn; 297 WARN_ON(kprobe_disabled(&op->kp)); 298 299 /* 300 * Backup instructions which will be replaced 301 * by jump address 302 */ 303 memcpy(op->optinsn.copied_insn, op->kp.addr, 304 RELATIVEJUMP_SIZE); 305 306 insn = arm_gen_branch((unsigned long)op->kp.addr, 307 (unsigned long)op->optinsn.insn); 308 BUG_ON(insn == 0); 309 310 /* 311 * Make it a conditional branch if replaced insn 312 * is consitional 313 */ 314 insn = (__mem_to_opcode_arm( 315 op->optinsn.copied_insn[0]) & 0xf0000000) | 316 (insn & 0x0fffffff); 317 318 /* 319 * Similar to __arch_disarm_kprobe, operations which 320 * removing breakpoints must be wrapped by stop_machine 321 * to avoid racing. 322 */ 323 kprobes_remove_breakpoint(op->kp.addr, insn); 324 325 list_del_init(&op->list); 326 } 327} 328 329void arch_unoptimize_kprobe(struct optimized_kprobe *op) 330{ 331 arch_arm_kprobe(&op->kp); 332} 333 334/* 335 * Recover original instructions and breakpoints from relative jumps. 336 * Caller must call with locking kprobe_mutex. 337 */ 338void arch_unoptimize_kprobes(struct list_head *oplist, 339 struct list_head *done_list) 340{ 341 struct optimized_kprobe *op, *tmp; 342 343 list_for_each_entry_safe(op, tmp, oplist, list) { 344 arch_unoptimize_kprobe(op); 345 list_move(&op->list, done_list); 346 } 347} 348 349int arch_within_optimized_kprobe(struct optimized_kprobe *op, 350 kprobe_opcode_t *addr) 351{ 352 return (op->kp.addr <= addr && 353 op->kp.addr + (RELATIVEJUMP_SIZE / sizeof(kprobe_opcode_t)) > addr); 354 355} 356 357void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 358{ 359 __arch_remove_optimized_kprobe(op, 1); 360}