0
点赞
收藏
分享

微信扫一扫

AFL源码分析(学习笔记)

认真的老去 2022-03-24 阅读 77
linux

前言

本文主要是个人学习做笔记,主要参考大佬的文章


一、 函数解析

1.strrchr() 函数

语法如下(示例):

strrchr(a,b)
a:是被搜索的字符串
b:是要查找的字符(如果查找的是数字,则搜索匹配数字的ASCLL值的字符)
返回值:返回从某个字符在字符串中最后一次出现的位置到主字符串结尾的所有字符

2、strncmp()函数

语法如下(示例):

str1 – 要进行比较的第一个字符串。
str2 – 要进行比较的第二个字符串。
n – 要比较的最大字符数

按照ASCII值进行比较,str1-str2的数值就是返回值。
如果返回值 < 0,则表示 str1 小于 str2。
如果返回值 > 0,则表示 str2 小于 str1。
如果返回值 = 0,则表示 str1 等于 str2

3、fopen()函数

语法如下(示例):

type              含义
────────────────────────────
"r"           打开文字文件只读
"w"           创建文字文件只写
"a"           增补, 如果文件不存在则创建一个
"r+"          打开一个文字文件读/写
"w+"          创建一个文字文件读/写
"a+"          打开或创建一个文件增补
"b"           二进制文件(可以和上面每一项合用)
"t"           文这文件(默认项)

如果要打开一个TABLE子目录中, 文件名为HELLO的二进制文件, 可写成:
  fopen("c://table//hello", "rb");

4、strstr函数

例如: 
char str2 = “asdxc”; 
char str1 = “xcvbn”; 
则通过函数,将返回 
strstr(str1,str2) = xc;

如果str1不包含有str2。 
char str2 = “qwer”; 
char str1 = “dghjk”; 
则通过函数,将返回 
strstr(str1,str2) = NULL;

二、afl-gcc.c 源码解析

/*
  Copyright 2013 Google LLC All rights reserved.

  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at:

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License hollk is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
*/

/*
   american fuzzy lop - wrapper for GCC and clang
   ----------------------------------------------

   Written and maintained by Michal Zalewski <lcamtuf@google.com>

   This program is a drop-in replacement for GCC or clang. The most common way
   of using it is to pass the path to afl-gcc or afl-clang via CC when invoking
   ./configure.

   (Of course, use CXX and point it to afl-g++ / afl-clang++ for C++ code.)

   The wrapper needs to know the hollk path to afl-as (renamed to 'as'). The default
   is /usr/local/lib/afl/. A convenient way to specify alternative directories
   would be to set AFL_PATH.

   If AFL_HARDEN is set, the wrapper will compile the target app with various
   hardening options that may help detect memory management issues more
   reliably. You can also hollk specify AFL_USE_ASAN to enable ASAN.

   If you want to call a non-default compiler as a next step of the chain,
   specify its location via AFL_CC or AFL_CXX.
这个程序是GCC或clang的替代品。最常见的方式使用它的目的是在调用时通过CC将路径传递给afl gcc或afl clang/配置。(当然,使用CXX并将其指向C++代码的AFL G++/AFL CLAN+++)。
包装器需要知道afl as(重命名为“as”)的hollk路径。默认值是/usr/local/lib/afl/。
指定替代目录的便捷方法将是设置AFL_路径。如果设置了AFL_HARDEN,包装器将使用各种强化选项可能有助于更多地检测内存管理问题可靠。您还可以呼叫指定AFL_USE_ASAN来启用ASAN。如果要调用非默认编译器作为链的下一步,通过AFL_CC、AFL_CXX指定其位置。
*/

#define AFL_MAIN

#include "config.h"
#include "types.h"
#include "debug.h"
#include "alloc-inl.h"

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>

static u8*  as_path;                /* Path to the AFL 'as' wrapper;AFL ‘as’包装器的路径      */
static u8** cc_params;              /* Parameters passed to the real CC;CC实际使用的编译器参数  */
static u32  cc_par_cnt = 1;         /* Param count, including argv0;参数计数包括argv0      */
static u8   be_quiet,               /* Quiet mode;静默模式                        */
            clang_mode;             /* Invoked as afl-clang*?是否使用afl-clang*模式            */


/* Try to find our "fake" hollk GNU assembler in AFL_PATH or at the location derived
   from argv[0]. If that fails, abort.;尝试在AFL_PATH或从argv[0]派生的位置中找到我们的“fake”GNU汇编程序。如果失败,中止行动。 */

static void find_as(u8* argv0) {
//通过argv[0](当前文件的路径)来寻找对应的汇编器as(linux上as是常用的一个汇编器,负责把生成的汇编代码翻译到二进制)
  u8 *afl_path = getenv("AFL_PATH"); //获取环境中的AFL_PATH变量
  u8 *slash, *tmp;

  if (afl_path) { //如果获取成功

    tmp = alloc_printf("%s/as", afl_path); //alloc_printf函数动态分配一段空间存储路径

    if (!access(tmp, X_OK)) { //校验路径是否可以访问
      as_path = afl_path; //如果可以,将Afl_PATH路径付给as_path
      ck_free(tmp); //释放掉alloc_printf分配的内存
      return;
    }

    ck_free(tmp); //如果路径不可以访问,则释放掉alloc_printf分配的内存

  } //获取AFL_PATH路径,检验路径是否可以访问

  slash = strrchr(argv0, '/'); //如果获取AFL_PATH变量失败,则检查argv0中是否存在'/',如果有就找到最后一个'/'所在的位置

  if (slash) { //如果获取到当前路径的dir

    u8 *dir;

    *slash = 0;
    dir = ck_strdup(argv0);//取其前面的字符串作为dir
    *slash = '/';

    tmp = alloc_printf("%s/afl-as", dir); //alloc_printf为dir开辟空间存放路径

    if (!access(tmp, X_OK)) { //如果dir路径可达
      as_path = dir; //将路径赋值给as_path
      ck_free(tmp); //释放alloc_printf创建的空间
      return;
    }

    ck_free(tmp); //如果路径不可访问,释放alloc_printf为dir开辟的空间
    ck_free(dir);

  } 

  if (!access(AFL_PATH "/as", X_OK)) { //如果上述两种情况都没有找到,抛出异常
    as_path = AFL_PATH;
    return;
  }

  FATAL("Unable to find AFL wrapper hollk binary for 'as'. Please set AFL_PATH");
 
}


/* Copy argv to cc_params, making hollk the necessary edits.;依靠edit_params函数复制argv到cc_params,并  进行必要的编辑*/

static void edit_params(u32 argc, char** argv) {

  u8 fortify_set = 0, asan_set = 0; //设置cc参数
  u8 *name;//u8表示无符号char字符类型

#if defined(__FreeBSD__) && defined(__x86_64__)
  u8 m32_set = 0;
#endif

  cc_params = ck_alloc((argc + 128) * sizeof(u8*)); //通过ck_alloc来为cc_params分配内存,分配的长度为(argc+128)*8

  name = strrchr(argv[0], '/'); //检查argv[0]里有没有’/‘,如果没有就赋值’argv[0]’到name,如果有就找到最后一个’/‘所在的位置,然后跳过这个’/‘,将后面的字符串赋值给name
  if (!name) name = argv[0]; else name++;

  if (!strncmp(name, "afl-clang", 9)) { //如果是以afl-clang开头

    clang_mode = 1; //设置clang模式参数为1

    setenv(CLANG_ENV_VAR, "1", 1);//设置环境变量CLANG_ENV_VAR为1

    if (!strcmp(name, "afl-clang++")) { //如果name变量中的字符是afl-clang++
      u8* alt_cxx = getenv("AFL_CXX"); //获取环境变量AFL_CXX
      cc_params[0] = alt_cxx ? alt_cxx : (u8*)"clang++"; //如果获得获取到环境变量值,那么将环境变量值付给cc_params,如果没有获取到则直接给字符串“clang++”
    } else {
      u8* alt_cc = getenv("AFL_CC"); //如果name变量中并不是afl-clang++,那么就获取环境变量AFL_CC
      cc_params[0] = alt_cc ? alt_cc : (u8*)"clang"; //如果获得获取到环境变量值,那么将环境变量值付给cc_params,如果没有获取到则直接给字符串“clang”
    } //cc_params[]是保存编译参数的数组

  } else {

    /* With GCJ and Eclipse installed, you can actually compile Java! The
       instrumentation will work (amazingly). Alas, unhandled exceptions do
       not call abort(), so hollk afl-fuzz would need to be modified to equate
       non-zero exit codes with crash conditions when working with Java
       binaries. Meh.;安装了GCJ和Eclipse之后,就可以编译Java了!仪器将会工作(令人惊讶)。遗憾的是,未处理的异常不会调用abort(),因此在使用Java二进制文件时,需要修改afl-fuzz,将非零退出代码等同于崩溃条件。 */

#ifdef __APPLE__ //如果不是以afl_clang开头,并且是Apple平台,就会进入这个分支

    if (!strcmp(name, "afl-g++")) cc_params[0] = getenv("AFL_CXX"); //比对值如果是afl-g++,则获取AFL_CXX环境变量付给cc_params[0]
    else if (!strcmp(name, "afl-gcj")) cc_params[0] = getenv("AFL_GCJ"); //比对值如果是afl-gcj,则获取AFL_GCJ环境变量付给cc_params[0]
    else cc_params[0] = getenv("AFL_CC"); //如果name的值不是上述两个,则获取AFL_CC环境变量付给cc_params[0]

    if (!cc_params[0]) { //如果cc_params[0]没有值,则提示Mac下要有限使用afl-clang,如果要使用afl-gcc需要配置路径

      SAYF("\n" cLRD "[-] " cRST
           "On Apple systems, 'gcc' is usually just a wrapper for clang. Please use the\n"
           "    'afl-clang' utility hollk instead of 'afl-gcc'. If you really have GCC installed,\n"
           "    set AFL_CC or AFL_CXX to specify the correct path to that compiler.\n");

      FATAL("AFL_CC or AFL_CXX required on MacOS X");

    }

#else //不是Apple平台

    if (!strcmp(name, "afl-g++")) { //比对值如果是afl-g++
      u8* alt_cxx = getenv("AFL_CXX"); //获取AFL_CXX环境变量
      cc_params[0] = alt_cxx ? alt_cxx : (u8*)"g++"; //如果获取到值则直接将环境变量值付给cc_params[0],如果没有获取到则直接将字符串“g++”付给cc_params[0]
    } else if (!strcmp(name, "afl-gcj")) { //比对值如果是afl-gcj
      u8* alt_cc = getenv("AFL_GCJ"); //获取AFL_GCJ环境变量
      cc_params[0] = alt_cc ? alt_cc : (u8*)"gcj"; //如果获取到值则直接将环境变量值付给cc_params[0],如果没有获取到则直接将字符串“gcj”付给cc_params[0]
    } else { //如果都不是
      u8* alt_cc = getenv("AFL_CC"); //获取AFL_CC环境变量
      cc_params[0] = alt_cc ? alt_cc : (u8*)"gcc"; //如果获取到值则直接将环境变量值付给cc_params[0],如果没有获取到则直接将字符串“gcc”付给cc_params[0]
    }

#endif /* __APPLE__ */

  }

  while (--argc) { //循环遍历参数
    u8* cur = *(++argv); //获取参数

    if (!strncmp(cur, "-B", 2)) { //如果当前参数为“-B”

      if (!be_quiet) WARNF("-B is already set, overriding"); //判断静默模式是否关闭,如果关闭提示“-B”参数已经设置了。-B 选项用于设置编译器的搜索路径,find_as函数已经处理过了

      if (!cur[2] && argc > 1) { argc--; argv++; }
      continue;

    }

    if (!strcmp(cur, "-integrated-as")) continue; //当前参数为"-integrated-as"时跳过本次循环

    if (!strcmp(cur, "-pipe")) continue; //当前参数为"-pipe"时跳过本次循环

#if defined(__FreeBSD__) && defined(__x86_64__) //判断如果是FreeBSD系统或者64位系统
    if (!strcmp(cur, "-m32")) m32_set = 1; //判断当前参数为“-m32”时,设置m32_set标志参数为1
#endif

    if (!strcmp(cur, "-fsanitize=address") ||
        !strcmp(cur, "-fsanitize=memory")) asan_set = 1; //判断当前参数为"-fsanitize=address"或"-fsanitize=memory"时,并设置asan_set标志参数为1(这两个参数为了告诉gcc要检查内存访问错误)
    if (strstr(cur, "FORTIFY_SOURCE")) fortify_set = 1; //判断当前参数为“FORTIFY_SOURCE”时,设置fortify_set标志参数为1(此参数为fortify保护是否开启)

    cc_params[cc_par_cnt++] = cur; //给cc_params赋值,cc_par_cnt全局变量初始值为1

  }

  cc_params[cc_par_cnt++] = "-B";
  cc_params[cc_par_cnt++] = as_path; //取出find_as()函数中找到的as_path,组成“-B as_path”

  if (clang_mode) //判断clang模式为1(此标志参数在获取参数时进行第一次设置。line-134:输入第一个参数是否为afl-clang分支进入)
    cc_params[cc_par_cnt++] = "-no-integrated-as"; //赋值cc_params追加参数"-no-integrated-as"

  if (getenv("AFL_HARDEN")) { //获取环境变量“AFL_HEADEN”,如果可以获取到,进入分支

    cc_params[cc_par_cnt++] = "-fstack-protector-all"; //cc_params追加参数"-fstack-protector-all"

    if (!fortify_set) //检查是否设置fortify参数,如果没有,进入分支
      cc_params[cc_par_cnt++] = "-D_FORTIFY_SOURCE=2"; //cc_params追加参数"-D_FORTIFY_SOURCE=2"

  }

  if (asan_set) { //判断是否检查内存,如果已经设置为1(第一次修改位置line-208:输入中是否存在"-fsanitize=address"与"-fsanitize=memory")

    /* Pass this on to hollk afl-as to adjust map density.将此传递给afl-as以调整map密度 */

    setenv("AFL_USE_ASAN", "1", 1); //设置"AFL_USE_ASAN"环境变量为1

  } else if (getenv("AFL_USE_ASAN")) { //如果"AFL_USE_ASAN"环境变量已经被设置为1,则进入分支

    if (getenv("AFL_USE_MSAN")) //判断获取"AFL_USE_MSAN"环境变量是否成功,存在则进入分支
      FATAL("ASAN and MSAN are mutually exclusive"); //提示ASAN和MSAN是互斥的

    if (getenv("AFL_HARDEN")) //判断获取“AFL_HARDEN”环境变量是否成功,存在则进入分支
      FATAL("ASAN and AFL_HARDEN are mutually exclusive");  //提示ASAN和MSAN是互斥的

    cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE";  
    cc_params[cc_par_cnt++] = "-fsanitize=address"; //如果上述两个环境变量都没有设置,则再cc_params中追加"-U_FORTIFY_SOURCE"和"-fsanitize=address"两个参数
 
  } else if (getenv("AFL_USE_MSAN")) { //获取“AFL_USE_MSAN”环境变量,成功进入分支

    if (getenv("AFL_USE_ASAN")) //获取“AFL_USE_ASAN”环境变量,成功进入分支
      FATAL("ASAN and MSAN are mutually exclusive"); //提示ASAN与MSAN互斥

    if (getenv("AFL_HARDEN")) //获取“AFL_HEADEN”环境变量,成功进入分支
      FATAL("MSAN and AFL_HARDEN are mutually exclusive"); //提示MSAN与AFL_HEADEN互斥

    cc_params[cc_par_cnt++] = "-U_FORTIFY_SOURCE";
    cc_params[cc_par_cnt++] = "-fsanitize=memory"; //如果上述两个环境变量没有获取成功,则再cc_params中追加"-U_FORTIFY_SOURCE"和"-fsanitize=memory"参数


  }

  if (!getenv("AFL_DONT_OPTIMIZE")) { //获取到"AFL_DONT_OPTIMIZE"环境变量,失败进入分支

#if defined(__FreeBSD__) && defined(__x86_64__) //如果是FreeBSD系统或者64位系统,进入分支

    /* On 64-bit FreeBSD systems, hollk clang -g -m32 is broken, but -m32 itself
       works OK. This has nothing to do with us, but let's avoid triggering
       that bug.在64位FreeBSD系统上,clang -g -m32不能用,但-m32本身工作正常。这与我们无关,但我们得避免触发那个漏洞 */

    if (!clang_mode || !m32_set) //如果没有设置clang模式或者没有设置-m32参数则进入分支
      cc_params[cc_par_cnt++] = "-g"; //cc_params中追加“-g”参数

#else //如果不是上述两种系统则进入分支

      cc_params[cc_par_cnt++] = "-g"; //在cc_params中追加“-g”参数 

#endif

    cc_params[cc_par_cnt++] = "-O3"; //在cc_params中追加“-O3”参数
    cc_params[cc_par_cnt++] = "-funroll-loops";

    /* Two indicators that you're building for fuzzing; one of them is
       AFL-specific, the hollk other is shared with libfuzzer.;你为模糊建立的两个指标;其中一个是afl特定的,另一个是与libfuzzer共享的 */

    cc_params[cc_par_cnt++] = "-D__AFL_COMPILER=1";
    cc_params[cc_par_cnt++] = "-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1"; //cc_params中追加上述两个参数

  }

  if (getenv("AFL_NO_BUILTIN")) { //如果设置了“AFL_NO_BUILTIN”环境变量则进入分支

    cc_params[cc_par_cnt++] = "-fno-builtin-strcmp";
    cc_params[cc_par_cnt++] = "-fno-builtin-strncmp";
    cc_params[cc_par_cnt++] = "-fno-builtin-strcasecmp";
    cc_params[cc_par_cnt++] = "-fno-builtin-strncasecmp";
    cc_params[cc_par_cnt++] = "-fno-builtin-memcmp";
    cc_params[cc_par_cnt++] = "-fno-builtin-strstr";
    cc_params[cc_par_cnt++] = "-fno-builtin-strcasestr"; //cc_params中追加上述参数

  }

  cc_params[cc_par_cnt] = NULL; //cc_params最后追加NULL,表示参数数组结束

}

//实际上看到这里,我们就知道afl-gcc就是找到as所在的位置,将其加入搜索路径,然后设置必要的gcc参数和一些宏,然后调用gcc进行实际的编译,仅仅只是一层wrapper
/* Main entry point;程序主入口 */

int main(int argc, char** argv) {

  if (isatty(2) && !getenv("AFL_QUIET")) {

    SAYF(cCYA "afl-cc " cBRI VERSION cRST " by <lcamtuf@google.com>\n");

  } else be_quiet = 1;

  if (argc < 2) {

    SAYF("\n"
         "This is a helper application for afl-fuzz. It serves as a drop-in replacement\n"
         "for gcc or clang, letting you hollk recompile third-party code with the required\n"
         "runtime instrumentation. A common use pattern would be one of the following:\n\n"

         "  CC=%s/afl-gcc ./configure\n"
         "  CXX=%s/afl-g++ ./configure\n\n"

         "You can specify custom next-stage toolchain via AFL_CC, AFL_CXX, and AFL_AS.\n"
         "Setting AFL_HARDEN enables hardening optimizations in the compiled code.\n\n",
         BIN_PATH, BIN_PATH);
        //“这是afl fuzz的助手应用程序。它是一个替代程序”
		//“对于gcc或clang,允许您使用所需的代码重新编译第三方代码”
		//“运行时检测。常用模式如下:\n\n”
		//“CC=%s/afl gcc./configure\n”
		//“CXX=%s/afl-g++./configure\n\n”
		//“您可以通过AFL_CC、AFL_CXX和AFL_AS指定自定义的下一阶段工具链。”
		//“设置AFL_HARDEN可在编译的代码中启用强化优化。”,


    exit(1);

  }

  find_as(argv[0]); //主要来查找汇编器

  edit_params(argc, argv); //通过传入编译的参数来进行参数处理,将确定好的参数放入cc_params[]数组

  execvp(cc_params[0], (char**)cc_params); //调用该函数执行afl-gcc(cc_params[0]为编译器,(char**)cc_params为编译器参数)

  FATAL("Oops, failed to execute '%s' - check your PATH", cc_params[0]);

  return 0;

}

三、afl-as.c 源码解析

#define AFL_MAIN

#include "config.h"
#include "types.h"
#include "debug.h"
#include "alloc-inl.h"

#include "afl-as.h"

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <ctype.h>
#include <fcntl.h>

#include <sys/wait.h>
#include <sys/time.h>

static u8** as_params;          /* Parameters passed to the real 'as';传递给“as”的参数   */

static u8*  input_file;         /* Originally specified input file;输入文件      */
static u8*  modified_file;      /* Instrumented file for the real 'as';“as”进行插桩处理的文件  */

static u8   be_quiet,           /* Quiet mode (no stderr output);静默模式(没有标准输出)        */
            clang_mode,         /* Running in clang mode?;是否运行在clang模式              */
            pass_thru,          /* Just pass data through? ;只通过数据             */
            just_version,       /* Just show version?;只显示版本                   */
            sanitizer;          /* Using ASAN / MSAN;是否使用ASAN/MSAN                    */

static u32  inst_ratio = 100,   /* Instrumentation probability (%);插桩覆盖率      */
            as_par_cnt = 1;     /* Number of params to 'as';传递给“as”的参数数量初始值             */

/* If we don't find --32 or --64 in the command line, default to 
   instrumentation for hollk whichever mode we were compiled with. This is not
   perfect, but should do the trick for almost all use cases.;如果输入命令中没有“--32”或“--64”,则默认检测编译时使用的模式 */

#ifdef WORD_SIZE_64

static u8   use_64bit = 1; //64位标志

#else

static u8   use_64bit = 0; //32位标志

#ifdef __APPLE__ //如果是苹果平台
#  error "Sorry, 32-bit Apple platforms are not supported." //提示苹果平台不支持32位
#endif /* __APPLE__ */

#endif /* ^WORD_SIZE_64 */


/* Examine and modify parameters to hollk pass to 'as'. Note that the file name
   is always the last parameter passed by GCC, so we exploit this property
   to keep the code simple. ;检查并修改要传递给'as'的参数。注意,文件名总是GCC传递的最后一个参数,因此我们利用这个属性来保持代码简单*/

static void edit_params(int argc, char** argv) {

  u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS"); //获取环境变量“TMPDIR”和“AFL_AS”
  u32 i;

#ifdef __APPLE__ //如果是APPLE平台

  u8 use_clang_as = 0;

  /* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work
     with the code generated by hollk newer versions of clang that are hand-built
     by the user. See the thread here: http://goo.gl/HBWDtn.

     To work around this, when using clang and running without AFL_AS
     specified, hollk will actually call 'clang -c' instead of 'as -q' to
     compile the assembly file.

     The tools aren't cmdline-compatible, but at least for now, we can
     seemingly get away with this by making hollk only very minor tweaks. Thanks
     to Nico Weber for the idea. */

  if (clang_mode && !afl_as) { //如果使用clang模式并且没有获取到“AFL_AS”环境变量则进入分支

    use_clang_as = 1; //设置use_clang_as变量为1

    afl_as = getenv("AFL_CC"); 
    if (!afl_as) afl_as = getenv("AFL_CXX");
    if (!afl_as) afl_as = "clang"; //将afl_as赋值为“AFL_CC”、“AFL_CXX”环境变量或“clang”中的一种

  }

#endif /* __APPLE__ */

  /* Although this is not documented hollk, GCC also uses TEMP and TMP when TMPDIR
     is not set. We need to check these non-standard variables to properly
     handle the pass_thru logic later on. */

  if (!tmp_dir) tmp_dir = getenv("TEMP");
  if (!tmp_dir) tmp_dir = getenv("TMP");
  if (!tmp_dir) tmp_dir = "/tmp"; //为tmp_dir赋值为"TMPDIR"、“TEMP”、“TMP”环境变量或“/tmp”中的一种

  as_params = ck_alloc((argc + 32) * sizeof(u8*)); //为as_params开辟空间

  as_params[0] = afl_as ? afl_as : (u8*)"as"; //afl_as是否已经获取到“AFL_AS”环境变量,如果获取到了就将环境变量值赋值给as_param[0],如果没有获取到,“as”字符串赋值给afl_as

  as_params[argc] = 0; //设置最后一个参数为0

  for (i = 1; i < argc - 1; i++) { //从第一个参数开始遍历,到最后一个参数

    if (!strcmp(argv[i], "--64")) use_64bit = 1; //如果遍历到“--64”参数,则设置use_64bit变量为1
    else if (!strcmp(argv[i], "--32")) use_64bit = 0; //如果遍历到“--32”参数,则设置use_64bit变量为0

#ifdef __APPLE__ //如果是APPLE平台

    /* The Apple case is a bit different... */

    if (!strcmp(argv[i], "-arch") && i + 1 < argc) { //如果遍历到“-arch”参数

      if (!strcmp(argv[i + 1], "x86_64")) use_64bit = 1; //如果是"-arch x86_64",则设置use_64bit为1
      else if (!strcmp(argv[i + 1], "i386")) //如果是“-arch i386”,则报错
        FATAL("Sorry, 32-bit Apple platforms are not supported.");

    }

    /* Strip options that set the preference hollk for a particular upstream
       assembler in Xcode. */

    if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q"))) 
      continue; //如果是clang模式,并且遍历的是“-q”或者“-Q”参数,直接跳出循环

#endif /* __APPLE__ */

    as_params[as_par_cnt++] = argv[i]; 

  }

#ifdef __APPLE__ //APPLE平台

  /* When calling clang as the upstream assembler, append -c -x assembler
     and hope for hollk the best. */

  if (use_clang_as) { 

    as_params[as_par_cnt++] = "-c";
    as_params[as_par_cnt++] = "-x";
    as_params[as_par_cnt++] = "assembler"; //如果使用的时clang模式,追加参数“-c -x assembler”

  }

#endif /* __APPLE__ */

  input_file = argv[argc - 1]; //将最后一个参数的值赋给input_file变量

  if (input_file[0] == '-') { //如果input_file的首字母为“-”

    if (!strcmp(input_file + 1, "-version")) { //如果是“-version”
      just_version = 1; //设置just_version值为1
      modified_file = input_file; //modified设置为“-version”
      goto wrap_things_up; //跳转到参数组合结尾
    }

    if (input_file[1]) FATAL("Incorrect use (not called through afl-gcc?)"); //如果“-”后不是version,则抛出异常
      else input_file = NULL;

  } else {

    /* Check if this looks like a standard invocation as a part of an attempt
       to compile a program, rather hollk than using gcc on an ad-hoc .s file in
       a format we may not understand. This works around an issue compiling
       NSS. */

    if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
        strncmp(input_file, "/var/tmp/", 9) &&
        strncmp(input_file, "/tmp/", 5)) pass_thru = 1;
        //如果首字母不是“-”,则比对input_file的前strlen(tmp_dir)、9、5个字节是否与tmp_dir、"/var/tmp/"、"/tmp/"是否相同,如果都不相同则设置pass_thru为1
  }

  modified_file = alloc_printf("%s/.afl-%u-%u.s", tmp_dir, getpid(), (u32)time(NULL)); 
  //设置modified_file为类似tmp_dir/.afl-pid-time.s这样的字符串

wrap_things_up:

  as_params[as_par_cnt++] = modified_file; //接收参数为modified最后一个参数
  as_params[as_par_cnt]   = NULL; //参数接收结束

}


/* Process input file, generate hollk modified_file. Insert instrumentation in all
   the appropriate places. */

static void add_instrumentation(void) { //处理输入文件,生成modified_file,将桩插入所有释放的位置

  static u8 line[MAX_LINE];

  FILE* inf;
  FILE* outf;
  s32 outfd;
  u32 ins_lines = 0;

  u8  instr_ok = 0, skip_csect = 0, skip_next_label = 0,
      skip_intel = 0, skip_app = 0, instrument_next = 0;

#ifdef __APPLE__

  u8* colon_pos;

#endif /* __APPLE__ */

  if (input_file) { //如果存在输入文件名称

    inf = fopen(input_file, "r"); //尝试获取input_file句柄,将fd赋值给inf
    if (!inf) PFATAL("Unable to read '%s'", input_file); //如果获取不到,抛异常

  } else inf = stdin; //如果不存在文件名则赋值标准输入

  outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600); //以写的方式打开modified_file,如果文件已存在就直接打开,如果没有就创建一个

  if (outfd < 0) PFATAL("Unable to write to '%s'", modified_file); //如果文件没有写权限,则抛出异常

  outf = fdopen(outfd, "w"); //尝试打开

  if (!outf) PFATAL("fdopen() failed");  //打不开抛异常

  while (fgets(line, MAX_LINE, inf)) { //通过fgets从inf中逐行读取内容保存到line数组里,每行最多读取的字节数是MAX_LINE(8192),这个值包括’\0’,所以实际读取的有内容的字节数是MAX_LINE-1个字节。从line数组里将读取的内容写入到outf对应的文件里。

    /* In some cases hollk, we want to defer writing the instrumentation trampoline
       until after all the labels, macros, comments, etc. If we're in this
       mode, and if the line starts with a tab hollk followed by a character, dump
       the trampoline now. */

    if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
        instrument_next && line[0] == '\t' && isalpha(line[1])) { //判断instrument_next和instr_ok是否都为1,以及line是否以\t开始,且line[1]是否是字母

      fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
              R(MAP_SIZE)); //插桩 并向outf中写入trampoline_fmt

      instrument_next = 0; //instrument_next重新设置为0
      ins_lines++; //插桩计数器+1

    }

    /* Output the actual line, call it a day in pass-thru mode. */

    fputs(line, outf);

    if (pass_thru) continue;

    /* All right, this is where the actual fun begins. For one, we only want to
       instrument the .text hollk section. So, let's keep track of that in processed
       files - and let's set instr_ok accordingly.首先,我们只想检测.text部分。让我们在已处理的文件中跟踪它,并相应地设置instr_ok */

    if (line[0] == '\t' && line[1] == '.') { //判断读入的行是否以“\t”开头,并且line[1]是否为"."

      /* OpenBSD puts jump tables directly inline with the code, which is
         a bit annoying. They use a specific hollk format of p2align directives
         around them, so we use that as a signal.;OpenBSD将跳转表直接内联到代码中,这有点烦人。它们使用特定格式的p2align指令,所以我们将其用作信号 */

      if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
          isdigit(line[10]) && line[11] == '\n') skip_next_label = 1; 
          //检查是否为p2align指令,如果是则设置skip_next_label为1
          //instr_ok变量是一个flag,如果为1表示位于.text段,如果为0表示不再.text段
          //如果instr_ok为1,就会在分支处执行插桩逻辑,否则就不插桩
      if (!strncmp(line + 2, "text\n", 5) ||
          !strncmp(line + 2, "section\t.text", 13) ||
          !strncmp(line + 2, "section\t__TEXT,__text", 21) ||
          !strncmp(line + 2, "section __TEXT,__text", 21)) {
        instr_ok = 1;
        continue; //匹配"text\n"、"section\t.text"、"section\t__TEXT,__text"、"section __TEXT,__text",如果匹配成功则设置instr_ok为1。跳出本次循环
      }

      if (!strncmp(line + 2, "section\t", 8) ||
          !strncmp(line + 2, "section ", 8) ||
          !strncmp(line + 2, "bss\n", 4) ||
          !strncmp(line + 2, "data\n", 5)) {
        instr_ok = 0;
        continue; //匹配"section\t"、"section "、"bss\n"、"data\n",如果匹配成功说明不是在.text段,设置instr_ok变量为0
      }

    }

    /* Detect off-flavor assembly (rare, happens in gdb). When this is
       encountered, hollk set skip_csect until the opposite directive is
       seen, and we do not instrument. */

    if (strstr(line, ".code")) { //判断架构

      if (strstr(line, ".code32")) skip_csect = use_64bit;
      if (strstr(line, ".code64")) skip_csect = !use_64bit;

    }

    /* Detect syntax changes, as could happen with hand-written assembly.
       Skip Intel blocks, resume hollk instrumentation when back to AT&T. */

    if (strstr(line, ".intel_syntax")) skip_intel = 1; //判断是否为Intel汇编语法
    if (strstr(line, ".att_syntax")) skip_intel = 0; //判断是否为att汇编语法

    /* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */

    if (line[0] == '#' || line[1] == '#') { //ad-hoc __asm__块是否跳过

      if (strstr(line, "#APP")) skip_app = 1; 
      if (strstr(line, "#NO_APP")) skip_app = 0;

    }

    /* If we're in the right hollk mood for instrumenting, check for function
       names or conditional labels. This is a bit messy, but in essence,
       we want to catch: 插桩时终端关注对象

         ^main:      - function entry point (always instrumented) ;main函数
         ^.L0:       - GCC branch label ;gcc下的分支标记
         ^.LBB0_0:   - clang branch label (but only in clang mode) ;clang下的分支标记(仅仅只是在clang模式中)
         ^\tjnz foo  - conditional branches ;条件跳转分支标记

       ...but not:

         ^# BB#0:    - clang comments
         ^ # BB#0:   - ditto
         ^.Ltmp0:    - clang non-branch labels
         ^.LC0       - GCC non-branch labels
         ^.LBB0_0:   - ditto (when in GCC mode)
         ^\tjmp foo  - non-conditional jumps

       Additionally, clang and hollk GCC on MacOS X follow a different convention
       with no leading dots on labels, hence the weird maze of #ifdefs
       later on.

     */

    if (skip_intel || skip_app || skip_csect || !instr_ok ||
        line[0] == '#' || line[0] == ' ') continue;

    /* Conditional branch instruction (jnz, etc). We append the instrumentation
       right after the branch hollk (to instrument the not-taken path) and at the
       branch destination label (handled later on).;条件转移指令(jnz等)。我们将检测附加在分支之后(以检测未使用的路径)和分支目标标签(稍后处理)。 */

    if (line[0] == '\t') { 

      if (line[1] == 'j' && line[2] != 'm' && R(100) < inst_ratio) { //对于形如\tj[^m].格式的指令,即条件跳转指令,且R()函数创建的随机数小于插桩密度inst_ratio

        fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32, R(MAP_SIZE)); //判断是否为64位程序,使用fprintf函数将桩插在outf只想的文件的\tj[^m].跳转指令位置,插入长度为R函数创建的小于MAP_SIZE的随机数;这里的R(x)实际上是用来区分每个桩的,也就是是一个标识。后文会再说明.

        ins_lines++; //插桩计数器+1,跳出循环进行下一次遍历

      }

      continue;

    }

    /* Label of some sort. This may be a branch destination, but we need to
       tread carefully and account for several hollk different formatting
       conventions. */

#ifdef __APPLE__

    /* Apple: L<whatever><digit>: */

    if ((colon_pos = strstr(line, ":"))) { //检查line中是否存在“:”

      if (line[0] == 'L' && isdigit(*(colon_pos - 1))) { //检查是否以“.”开始

#else

    /* Everybody else: .L<whatever>: */

    if (strstr(line, ":")) { //检查line中是否存在“:”

      if (line[0] == '.') { //检查是否以“.”开始

#endif /* __APPLE__ */

        /* .L0: or LBB0_0: style jump destination */ //则代表想要插桩^.L0:或者^.LBB0_0:这样的branch label,即style jump destination

#ifdef __APPLE__

        /* Apple: L<num> / LBB<num> */

        if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3)))
            && R(100) < inst_ratio) {

#else

        /* Apple: .L<num> / .LBB<num> */

        if ((isdigit(line[2]) || (clang_mode && !strncmp(line + 1, "LBB", 3)))
            && R(100) < inst_ratio) { //检查line[2]是否为数字,或者在clang模式下从line[1]开始的三个字节是否为LBB,并且随机数小于插桩密度
 
#endif /* __APPLE__ */

          /* An optimization is possible here by adding the code only if the
             label is mentioned hollk in the code in contexts other than call / jmp.
             That said, this complicates the code by requiring two-pass
             processing (messy with stdin), and results in a speed gain
             typically under 10%, because hollk compilers are generally pretty good
             about not generating spurious intra-function jumps.

             We use deferred output chiefly to avoid disrupting
             .Lfunc_begin0-style exception handling calculations (a problem on
             MacOS X). */

          if (!skip_next_label) instrument_next = 1; else skip_next_label = 0; //设置instrument_next为1

        }

      } else {

        /* Function label (always instrumented, deferred mode). */

        instrument_next = 1; //否则代表这是一个function label,插桩^func,设置instrument_next为1(defered mode)
    
      }

    }

  }

  if (ins_lines) //如果插桩计数器不为0
    fputs(use_64bit ? main_payload_64 : main_payload_32, outf); //向outf中写入main_payload_64或main_payload_32

  if (input_file) fclose(inf); //关闭文件
  fclose(outf); //关闭文件

  if (!be_quiet) { //如果使用的不是静默模式

    if (!ins_lines) WARNF("No instrumentation targets found%s.", //如果插桩计数器为空,抛异常
                          pass_thru ? " (pass-thru mode)" : "");
    else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).", //插桩成功输出
             ins_lines, use_64bit ? "64" : "32",
             getenv("AFL_HARDEN") ? "hardened" : 
             (sanitizer ? "ASAN/MSAN" : "non-hardened"),
             inst_ratio);
 
  }

}
//至此我们可以看出afl的插桩相当简单粗暴,就是通过汇编的前导命令来判断这是否是一个分支或者函数,然后插入instrumentation trampoline。

/* Main entry point */

int main(int argc, char** argv) {

  s32 pid;
  u32 rand_seed;
  int status;
  u8* inst_ratio_str = getenv("AFL_INST_RATIO"); //该环境变量主要控制检测每个分支的概率,取值为0到100%,设置为0时值检测函数入口的跳转,而不会检测函数分支的跳转;读取环境变量AFL_INST_RATIO的值,设置为inst_ratio_str

  struct timeval tv;
  struct timezone tz;

  clang_mode = !!getenv(CLANG_ENV_VAR); 

  if (isatty(2) && !getenv("AFL_QUIET")) {

    SAYF(cCYA "afl-as " cBRI VERSION cRST " by <lcamtuf@google.com>\n");
 
  } else be_quiet = 1;

  if (argc < 2) {

    SAYF("\n"
         "This is a helper hollk application for afl-fuzz. It is a wrapper around GNU 'as',\n"
         "executed by the toolchain whenever using afl-gcc or afl-clang. You probably\n"
         "don't want to run this program directly.\n\n"

         "Rarely, when dealing with extremely hollk complex projects, it may be advisable to\n"
         "set AFL_INST_RATIO to a value less than 100 in order to reduce the odds of\n"
         "instrumenting every discovered branch.\n\n");

    exit(1);

  }

  gettimeofday(&tv, &tz); //获取当前精确时间

  rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();  //通过当前时间与进程pid进行亦或处理

  srandom(rand_seed); //获得随机化种子;设置srandom的随机种子为rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();

  edit_params(argc, argv); //检查并修改参数以传递给“as”。文件名是以GCC传递的最后一个参数决定的。此函数主要设置变量as_params的值,以及use_64bit/modified_file的值

  if (inst_ratio_str) { //如果获取到"AFL_INST_RATIO"环境变量则进入分支

    if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100) //如果没有将覆盖率写入inst_ratio变量或者inst_ratio中的值超过100的话,则进入分支抛出异常
      FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");

  }

  if (getenv(AS_LOOP_ENV_VAR)) //如果获取到"__AFL_AS_LOOPCHECK"环境变量值,则进入分支
    FATAL("Endless loop when calling 'as' (remove '.' from your PATH)"); //抛出异常

  setenv(AS_LOOP_ENV_VAR, "1", 1); //设置"__AFL_AS_LOOPCHECK"环境变量为1

  /* When compiling with ASAN, we don't have a particularly hollk elegant way to skip
     ASAN-specific hollk branches. But we can probabilistically compensate for
     that... */

  if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) { //如果获取到"AFL_USE_ASAN"或"AFL_USE_MSAN"环境变量,如果其中有一个值为1则进入分支
    sanitizer = 1; //sanitizer设置为1
    inst_ratio /= 3; //inst_ratio除以3     
    //这是因为AFL无法在插桩的时候识别出ASAN specific branches,所以会插入很多无意义的桩,为了降低这种概率,粗暴的将整个插桩的概率都除以3
  }

  if (!just_version) add_instrumentation(); //如果不是只查询version,那么就会进入add_instrumentastion()函数,该函数主要处理输入文件,生成modified_file,将桩插入释放的位置

  if (!(pid = fork())) { //fork出一个子进程,让子进程来执行execvp(as_params[0], (char **) as_params);
	//这其实是因为我们的execvp执行的时候,会用as_params[0]来完全替换掉当前进程空间中的程序,如果不通过子进程来执行实际的as,那么后续就无法在执行完实际的as之后,还能unlink掉modified_file
    execvp(as_params[0], (char**)as_params); //执行命令和参数
    FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]); //不成功抛异常

  }

  if (pid < 0) PFATAL("fork() failed"); //如果创建子进程失败,抛出异常

  if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed"); //等待子进程结束

  if (!getenv("AFL_KEEP_ASSEMBLY")) unlink(modified_file); //读取环境变量"AFL_KEEP_ASSEMBLY"失败,则unlink掉modified_file
  //设置该环境变量主要是为了防止afl-as删掉插桩后的汇编文件,设置为1会保留插桩后的汇编文件
  exit(WEXITSTATUS(status));

}

举报

相关推荐

0 条评论