Glibc Heap Exploit 坐牢笔记 - 0x05

0xAkyOI

2024-01-25 (Updated: 2024-09-30)

# libc-cmp : libc-2.23 AND libc-2.27

# Tcache 定义

# Tcache 的结构和 chunk 的对比：

struct malloc_chunk {

  INTERNAL_SIZE_T      prev_size;  /* Size of previous chunk (if free).  */
  INTERNAL_SIZE_T      size;       /* Size in bytes, including overhead. */

  struct malloc_chunk* fd;         /* double links -- used only if free. */
  struct malloc_chunk* bk;

  /* Only used for large blocks: pointer to next larger size.  */
  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
  struct malloc_chunk* bk_nextsize;
};

typedef struct tcache_entry
{
  struct tcache_entry *next;
} tcache_entry;

typedef struct tcache_perthread_struct
{
  char counts[TCACHE_MAX_BINS];
  tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;

可以看到 Tcache 只开启了一个 * next 指针，用来像 fastbin 一样把 Tcache 们串联起来。注意到它在的位置是在一个 chunk 内存上最开始的位置，即 prev_size 的地方。
每个 thread 都会创建一个 tcache_perthread_struct 用来存储 tcache 相关信息，其中记录了 Tcache 中每个 tcache bin 的 chunk 的数目以及每个 tcache bin 的头节点

但是那是 2.27 前期的代码，后期添加了一个检测 double free 的 key，于是结构体变成了 :

/* We overlay this structure on the user-data portion of a chunk when
   the chunk is stored in the per-thread cache.  */
typedef struct tcache_entry
{
  struct tcache_entry *next;
  /* This field exists to detect double frees.  */
  struct tcache_perthread_struct *key;
} tcache_entry;

# Tcache 相关常量 && 宏

#if USE_TCACHE
/* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
# define TCACHE_MAX_BINS		64
# define MAX_TCACHE_SIZE	tidx2usize (TCACHE_MAX_BINS-1)

/* Only used to pre-fill the tunables.  */
# define tidx2usize(idx)	(((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)

/* When "x" is from chunksize().  */
# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
/* When "x" is a user-provided size.  */
# define usize2tidx(x) csize2tidx (request2size (x))

/* With rounding and alignment, the bins are...
   idx 0   bytes 0..24 (64-bit) or 0..12 (32-bit)
   idx 1   bytes 25..40 or 13..20
   idx 2   bytes 41..56 or 21..28
   etc.  */

/* This is another arbitrary limit, which tunables can change.  Each
   tcache bin will hold at most this number of chunks.  */
# define TCACHE_FILL_COUNT 7
#endif

可以看到 tcache 最多 64 个 bins，每个 tcache bin 最多存储 7 个 chunk
还定义了 size，idx 互换的宏。

新版本添加了：

1	#define MAX_TCACHE_COUNT UINT16_MAX /* Maximum value of counts[] entries. */

规定了 Tcache 的数量最大值，数值为 127，估计是为了优化程序，防止溢出啥的

# Tcache 插入方式

/* Caller must ensure that we know tc_idx is valid and there's room
   for more chunks.  */
static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
  assert (tc_idx < TCACHE_MAX_BINS);
  e->next = tcache->entries[tc_idx];
  tcache->entries[tc_idx] = e;
  ++(tcache->counts[tc_idx]);
}

/* Caller must ensure that we know tc_idx is valid and there's
   available chunks to remove.  */
static __always_inline void *
tcache_get (size_t tc_idx)
{
  tcache_entry *e = tcache->entries[tc_idx];
  assert (tc_idx < TCACHE_MAX_BINS);
  assert (tcache->entries[tc_idx] > 0);
  tcache->entries[tc_idx] = e->next;
  --(tcache->counts[tc_idx]);
  return (void *) e;
}

通过代码可以知道把 chunk 插入到 Tcache bin 的方法和 fastbin 类似，都是 FILO（LIFO）。
同样取出 chunk 的方法也和 fastbin 类似。

但是新版本又改了 put 和 get，用于处理 key：
可以知道 key 数值就是 tcache 结构体的 chunk 的地址
并且取出的时候赋值为 NULL

/* Caller must ensure that we know tc_idx is valid and there's room
   for more chunks.  */
static __always_inline void tcache_put (mchunkptr chunk, size_t tc_idx)
{
  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
  assert (tc_idx < TCACHE_MAX_BINS);

  /* Mark this chunk as "in the tcache" so the test in _int_free will
     detect a double free.  */
  e->key = tcache;

  e->next = tcache->entries[tc_idx];
  tcache->entries[tc_idx] = e;
  ++(tcache->counts[tc_idx]);
}
/* Caller must ensure that we know tc_idx is valid and there's
   available chunks to remove.  */
static __always_inline void *
tcache_get (size_t tc_idx)
{
  tcache_entry *e = tcache->entries[tc_idx];
  assert (tc_idx < TCACHE_MAX_BINS);
  assert (tcache->entries[tc_idx] > 0);
  tcache->entries[tc_idx] = e->next;
  --(tcache->counts[tc_idx]);
  e->key = NULL;
  return (void *) e;
}

# Tcache 的 shutdown 和 Init

直接看注释。可以看到它是把每个 Tcache 中的 chunk 当作 in use 的 chunk 进行处理的，所以 shutdown 的时候把每个 Tcache bin 中的 chunk 都调用__libc_free () 进行正常的 free。
从 init 过程中我们可以知道使用 tcache_perthread_struct 结构体的 tcache 是作为一个 chunk 而存在的，它就是记录着每个 Tcache bin 信息的 chunk

static void
tcache_thread_shutdown (void)
{
  int i;
  tcache_perthread_struct *tcache_tmp = tcache;

  if (!tcache)
    return;

  /* Disable the tcache and prevent it from being reinitialized.  */
  tcache = NULL;
  tcache_shutting_down = true;

  /* Free all of the entries and the tcache itself back to the arena
     heap for coalescing.  */
  for (i = 0; i < TCACHE_MAX_BINS; ++i)
    {
      while (tcache_tmp->entries[i])
	{
	  tcache_entry *e = tcache_tmp->entries[i];
	  tcache_tmp->entries[i] = e->next;
	  __libc_free (e);
	}
    }

  __libc_free (tcache_tmp);
}

static void
tcache_init(void)
{
  mstate ar_ptr;
  void *victim = 0;
  const size_t bytes = sizeof (tcache_perthread_struct);

  if (tcache_shutting_down)
    return;

  arena_get (ar_ptr, bytes);
  victim = _int_malloc (ar_ptr, bytes);
  if (!victim && ar_ptr != NULL)
    {
      ar_ptr = arena_get_retry (ar_ptr, bytes);
      victim = _int_malloc (ar_ptr, bytes);
    }


  if (ar_ptr != NULL)
    __libc_lock_unlock (ar_ptr->mutex);

  /* In a low memory situation, we may not be able to allocate memory
     - in which case, we just keep trying later.  However, we
     typically do this very early, so either there is sufficient
     memory, or there isn't enough memory to do non-trivial
     allocations anyway.  */
  if (victim)
    {
      tcache = (tcache_perthread_struct *) victim;
      memset (tcache, 0, sizeof (tcache_perthread_struct));
    }

}

# __libc_malloc 函数关于 Tcache 的变化

#if USE_TCACHE
  /* int_free also calls request2size, be careful to not pad twice.  */
  size_t tbytes;
  checked_request2size (bytes, tbytes);
  size_t tc_idx = csize2tidx (tbytes);

  MAYBE_INIT_TCACHE ();

  DIAG_PUSH_NEEDS_COMMENT;
  if (tc_idx < mp_.tcache_bins
      /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
      && tcache
      && tcache->entries[tc_idx] != NULL)
    {
      return tcache_get (tc_idx);
    }
  DIAG_POP_NEEDS_COMMENT;
#endif

增加了对 Tcache 的判断
干的事就是判断 Tcache 里面是否有刚好满足大小需求 chunk，如果有，就取出再 return 它就 OK 了。此外判断了 chunk 大小是否在 Tcache 大小范围内

之后就是_int_malloc 的变化，写在注释了：

#if USE_TCACHE
  size_t tcache_unsorted_count;	    /* count of unsorted chunks processed */
#endif


//在fastbin范围内，取出fastbin的chunk，不断把这个bin里面的chunk扔到Tcache bin里面，直到这个Tcache bin满为止
#if USE_TCACHE
	      /* While we're here, if we see other chunks of the same size,
		 stash them in the tcache.  */
	      size_t tc_idx = csize2tidx (nb);
	      if (tcache && tc_idx < mp_.tcache_bins)
		{
		  mchunkptr tc_victim;

		  /* While bin not empty and tcache not full, copy chunks.  */
		  while (tcache->counts[tc_idx] < mp_.tcache_count
			 && (tc_victim = *fb) != NULL)
		    {
		      if (SINGLE_THREAD_P)
			*fb = tc_victim->fd;
		      else
			{
			  REMOVE_FB (fb, pp, tc_victim);
			  if (__glibc_unlikely (tc_victim == NULL))
			    break;
			}
		      tcache_put (tc_victim, tc_idx);
		    }
		}
#endif

//small bin范围内，同fastbin
#if USE_TCACHE
	  /* While we're here, if we see other chunks of the same size,
	     stash them in the tcache.  */
	  size_t tc_idx = csize2tidx (nb);
	  if (tcache && tc_idx < mp_.tcache_bins)
	    {
	      mchunkptr tc_victim;

	      /* While bin not empty and tcache not full, copy chunks over.  */
	      while (tcache->counts[tc_idx] < mp_.tcache_count
		     && (tc_victim = last (bin)) != bin)
		{
		  if (tc_victim != 0)
		    {
		      bck = tc_victim->bk;
		      set_inuse_bit_at_offset (tc_victim, nb);
		      if (av != &main_arena)
			set_non_main_arena (tc_victim);
		      bin->bk = bck;
		      bck->fd = bin;

		      tcache_put (tc_victim, tc_idx);
	            }
		}
	    }
#endif

//large bin范围内，同样。
#if USE_TCACHE
  INTERNAL_SIZE_T tcache_nb = 0;
  size_t tc_idx = csize2tidx (nb);
  if (tcache && tc_idx < mp_.tcache_bins)
    tcache_nb = nb;
  int return_cached = 0;

  tcache_unsorted_count = 0;
#endif

下面是在 chunk 从 unsorted bin 取出 chunk 并返回该 chunk 的程序添加的内容。
如果 unsorted bin 中的 chunk 满足需求，而且 Tcache bin 没有满，就取出它并扔到 Tcache bin 里面，在下一次循环取出（艹，什么迷惑行为？哦，原来是相当于之前不断取出 chunk 再放入 Tcache bin 的过程，一直放到 Tcache 满。）。但是如果满了，就直接 return 那个 chunk 了。
这样看实现的功能和前面差不多

/* Take now instead of binning if exact fit */
          if (size == nb)
            {
              set_inuse_bit_at_offset (victim, size);
              if (av != &main_arena)
		set_non_main_arena (victim);
#if USE_TCACHE
	      /* Fill cache first, return to user only if cache fills.
		 We may return one of these chunks later.  */
	      if (tcache_nb
		  && tcache->counts[tc_idx] < mp_.tcache_count)
		{
		  tcache_put (victim, tc_idx);
		  return_cached = 1;
		  continue;
		}
	      else
		{
#endif
              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
#if USE_TCACHE
		}
#endif
            }

在 chunk 从 unsorted bin 取出进入 large bin 的分支的程序：
这段代码是用于处理内存分配的缓存（tcache）的。在 C 语言中，tcache 是一种优化技术，用于存储小的、未使用的内存块，以便在需要时快速分配。
代码中的两个 #if USE_TCACHE 条件编译块分别处理两种情况：
如果已经处理了足够多的缓存块，并且允许返回缓存的块，那么将返回一个已缓存的块。这是通过增加 tcache_unsorted_count 计数器来实现的，然后检查是否超过了限制 mp_.tcache_unsorted_limit。如果超过了限制，就调用 tcache_get (tc_idx) 函数来获取并返回一个已缓存的块。
如果所有找到的小内存块都已经被缓存，那么现在返回一个。这是通过检查 return_cached 变量来实现的。如果 return_cached 为真，那么就调用 tcache_get (tc_idx) 函数来获取并返回一个已缓存的块。
这段代码的主要目的是在内存分配时使用 tcache 来提高性能。
（人工智能这时候还挺靠谱）

#if USE_TCACHE
      /* If we've processed as many chunks as we're allowed while
	 filling the cache, return one of the cached ones.  */
      ++tcache_unsorted_count;
      if (return_cached
	  && mp_.tcache_unsorted_limit > 0
	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
	{
	  return tcache_get (tc_idx);
	}
#endif

#if USE_TCACHE
      /* If all the small chunks we found ended up cached, return one now.  */
      if (return_cached)
	{
	  return tcache_get (tc_idx);
	}
#endif

# __libc_free 函数关于 Tcache 的变化

就增加了一句

1	MAYBE_INIT_TCACHE ();

然后就正常调用_int_free 了，那么我们就看_int_free 函数。其中对 Tcache 进行了讨论，本质就是调用 tcache_put 函数
检查了 chunk 大小是否应该放进 Tcache bin，检查了 bin 是不是满了。

#if USE_TCACHE
  {
    size_t tc_idx = csize2tidx (size);

    if (tcache
	&& tc_idx < mp_.tcache_bins
	&& tcache->counts[tc_idx] < mp_.tcache_count)
      {
	tcache_put (p, tc_idx);
	return;
      }
  }
#endif

但是更新后加入了新的检查：
现在，调用 int_free 时，将会检查整个 Tcache 链表，如果发现将要释放的 chunk 已存在于链表中将会报错 free (): double free detected in tcache 2。
但是进入报错的分支的条件之一就是 e->key == tcache，因此我们把 key 数值破坏就可以进行 double free 了。

#if USE_TCACHE
  {
    size_t tc_idx = csize2tidx (size);
    if (tcache != NULL && tc_idx < mp_.tcache_bins)
      {
        /* Check to see if it's already in the tcache.  */
        tcache_entry *e = (tcache_entry *) chunk2mem (p);

        /* This test succeeds on double free.  However, we don't 100%
           trust it (it also matches random payload data at a 1 in
           2^<size_t> chance), so verify it's not an unlikely
           coincidence before aborting.  */
        if (__glibc_unlikely (e->key == tcache))
          {
            tcache_entry *tmp;
            LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
            for (tmp = tcache->entries[tc_idx]; tmp; tmp = tmp->next)
              if (tmp == e)
                malloc_printerr ("free(): double free detected in tcache 2");
                /* If we get here, it was a coincidence.  We've wasted a
                   few cycles, but don't abort.  */
          }

        if (tcache->counts[tc_idx] < mp_.tcache_count)
          {
            tcache_put (p, tc_idx);
            return;
          }
      }
  }
#endif

# 其他 Tcache 的函数

#if USE_TCACHE
static inline int
__always_inline
do_set_tcache_max (size_t value)
{
  if (value >= 0 && value <= MAX_TCACHE_SIZE)
    {
      LIBC_PROBE (memory_tunable_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
      mp_.tcache_max_bytes = value;
      mp_.tcache_bins = csize2tidx (request2size(value)) + 1;
    }
  return 1;
}

static inline int
__always_inline
do_set_tcache_count (size_t value)
{
  LIBC_PROBE (memory_tunable_tcache_count, 2, value, mp_.tcache_count);
  mp_.tcache_count = value;
  return 1;
}

static inline int
__always_inline
do_set_tcache_unsorted_limit (size_t value)
{
  LIBC_PROBE (memory_tunable_tcache_unsorted_limit, 2, value, mp_.tcache_unsorted_limit);
  mp_.tcache_unsorted_limit = value;
  return 1;
}
#endif