Tuesday, June 1, 2010

Heap

This is the heap module, it's a simple Cheney style garbage collector. Again, no bit packing, and serialization of the heap just copies a tree like structure. The garbage collector is semi-exact, which has some nice properties with respect to static data, not sure I want that manner of inexactness on all places though. Guess I'll leave it at this for now.

I guess I should give some kind of rationale for this. Of course, you can go concurrent mark-and-sweep as shown by Erlang. But, my assumption is that at some point, for a sufficient number of cores -which may well be above sixteen- and more localized memory models, it just pays of to give each core its own memory and possibly implement light-weight threading by just swapping between different root nodes.

/**
 *  COPYRIGHT 2010, M.C.A. Devillers
 *
 *  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
 *  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 *  A simple library for a Cheney style heap. 
 *
 *  1. Every value is either a pointer or an integer of pointer size.
 *
 *  2. The heap holds nodes. Every node starts of with a two integer
 *     header. The first integer describes the size of the whole node, 
 *     the second integer is either 0, in which case all values in 
 *     that node are integers, or not, in which case all values in 
 *     that node are pointers to other nodes possibly in that heap.
 *
 *     Note: the Hi language runtime elaborates on the meaning of the
 *     second integer in the following manner: 
 *          0 - a constant, 1 - a record, otherwise - a thunk.
 *
 *  3. At the moment, the collector is semi-exact. Pointers which do
 *     not point into the heap are not chased. This makes it possible
 *     to use pointers to static data, and it makes it possible to
 *     use small integers as if they would be integers.
 *
 *     It implies serialization of static data is handled 
 *     different, it serializes everything and can only handle non-
 *     cyclic structures which is a severe drawback.
 *
 *  4. A heap may be GCed, in which case _a whole new heap is 
 *     allocated_ and the heap is copied to the new heap. This is to 
 *     facilitate multi-core processing of several coarse-grained
 *     threads without the need to keep unused to spaces around for 
 *     all heaps used. 
 *     Ordinary 'malloc' is used to allocate new heaps.
 *
 *  5. The heap size may be grown at will for special operations. 
 *     There are no facilities, as of yet, to shrink the heap size.
 *
 *  6. A heap may be serialized to a region in memory, in which case 
 *     only the first root node is copied. Unserialization inserts a 
 *     root node from a region in memory to a given heap.
 *
 *  A design which is neither very compact nor very fast, but minimal 
 *  and robust.
 */
#ifndef HEAP_H
#define HEAP_H

#include <stdio.h>
#include <stdint.h>
#include "../types/types.h"
 
#define ROOTS   1024

typedef struct {
    native_int      size;
    native_int*     low;
    native_int*     high;
    native_int*     free;
 
    native_int*     trigger_gc;
    native_int*     trigger_grow;

    native_int      root_free;
    native_int*     root[ROOTS];

    native_int*     memory;
} heap_t;

heap_t*     heap_create(native_int sz);
void*       heap_destroy(heap_t* hp);
native_int  heap_root_count(heap_t* hp);
native_int  heap_root_new(heap_t* hp);
void        heap_root_set(heap_t* hp, native_int n, native_int* r);
native_int* heap_root_get(heap_t* hp, native_int n);
native_int* heap_allocate(heap_t* hp, native_int sz);
heap_t*     heap_collect(heap_t* hp);
heap_t*     heap_try_collect(heap_t* hp);
heap_t*     heap_reserve(heap_t* hp, native_int sz);
native_int  heap_serialize(heap_t* hp, void* p, native_int sz);
native_int* heap_unserialize(heap_t* hp, void* p, native_int sz);
native_int  heap_size(heap_t* hp);
native_int  heap_used(heap_t* hp);
native_int  heap_free(heap_t* hp);
void        heap_info(FILE* f, heap_t* hp);
void        heap_debug(FILE* f, heap_t* hp);
#endif // HEAP_H
/**
 *  COPYRIGHT 2008, M.C.A. Devillers
 *
 *  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
 *  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 */
#include <stdlib.h>
#include <string.h>
#include <dlfcn.h>
#include "../assert/assert.h"
#include "heap.h"
#define BITS_SET(b0,b1)     ((b0) | (b1))
#define BITS_UNSET(b0,b1)   ((b0) & (~(b1)))
#define BITS_TEST(b0,b1)    (((b0) & (b1)) != 0)
#define BITS_FLIP(b0,b1)    ((b0) (b1))
#define BIT_SET(n,b)        BITS_SET(n, 1<<b)
#define BIT_UNSET(n,b)      BITS_UNSET(n, 1<<b)
#define BIT_TEST(n,b)       BITS_TEST(n, 1<<b)
#define BIT_FLIP(n,b)       BITS_FLIP(n, 1<<b)

#define BELOW(p,q)          (((void*) p) < ((void*) q))
#define BELOWEQ(p,q)        ((void*) p <= (void*) q)
#define IN_HEAP(p, l, h) \
    ( ((void*) p >= (void*) l) && ((void*) p < (void*) h) )

#define DIFFERENCE(p,q)     ((native_int)((void*)p - (void*) q))
#define INCREMENT(p,n)      ((native_int*)((void*)p + (int) n))
#define DECREMENT(p,n)      ((native_int*)((void*)p - (int) n))

#define ADVANCE(p,n)        &(((native_int*)p)[(int)n])
#define RETREAT(p,n)        &(((native_int*)p)[0-(int)n])

#define HEAP_ALLIGNMENT     512
#define HEAP_MINIMAL_GROW   2048
#define HEAP_MINIMAL_FREE   1024
#define HEAP_MINIMAL_SIZE   4096

#define SMALL_INTEGER       64*1024
#define IS_SMALL_INTEGER(p) BELOW(p, (SMALL_INTEGER))


/** Allocate alligned memory.
 *
 *  /param m    the size to be allocared
 *  /param n    the alignement (some power of two, preferably cache-size)
 *  /return     a chunk of memory
 
 */
void* malloc_alligned(size_t m, int n) {
    TRACE;
    void* p = 0;
    if (posix_memalign(&p, n, m) == 0) {
        ASSERT(!IS_SMALL_INTEGER(p));
        return p;
    } else {
        return 0;
    }
} 

/** Create a heap.
 *
 *  The heap size is _not_ the number of bytes allocated, but the
 *  total number of bytes _including_ the header size.
 *
 *  Exits if a new heap cannot be allocated.
 *
 *  /param sz   the heap size
 *  /return     a heap
 */
 
heap_t* heap_create(native_int sz) {
    TRACE;
    ASSERT(sz >= HEAP_MINIMAL_SIZE);

    heap_t* r = (heap_t*) malloc_alligned(sz, HEAP_ALLIGNMENT);
    if (r == 0) {
        fprintf(stderr,
            "HEAP_PANIC[0]: failed to allocate a heap of size %u\n",
            (unsigned int) sz);
        exit(1);
    }

    r->size = sz;
    r->low  = (native_int*) &(r->memory);
    r->high = INCREMENT(r, sz);
    r->free = r->low;

    r->root_free = 0;
 
    DEBUG(native_int i;)
    DEBUG(for (i = 0; i < ROOTS; i++) r->root[i] = 0;)

    r->trigger_gc   = DECREMENT(r->high, HEAP_MINIMAL_FREE);
    r->trigger_grow = DECREMENT(r->high, HEAP_MINIMAL_GROW);

    DEBUG(native_int* p;)
    DEBUG(for (p = r->low; p < r->high; p++) *p = 0;)

    return r;
}

/** Destroy a heap.
 *
 *  /param hp   a heap
 */
void* heap_destroy(heap_t* hp) {
    TRACE;

    free(hp);
    return 0;
}

/** Give the number of roots in the heap.
 *
 *  /param hp   a heap
 *  /return     the number of roots
 */
 
native_int heap_root_count(heap_t* hp) {
    TRACE;

    return hp->root_free;
}

/** Allocate a new root slot in the heap.
 *
 *  The number of roots is a fixed number, see ROOTS.
 *
 *  Exits if there are no more roots available.
 *
 *  /param hp   a heap
 *  /return     a new root slot
 */
native_int heap_root_new(heap_t* r) {
    TRACE;
 
    native_int free = r->root_free;
    r->root_free++;
    if (r->root_free > ROOTS) {
        fprintf(stderr,
            "HEAP_PANIC[1]: out of roots %d\n",
            ROOTS);
        exit(1);
    }
 
    return free;
}

/** Set a root node to a pointer (in the heap).
 *
 *  /param hp   a heap
 *  /param n    a root slot
 *  /param r    a pointer to a structure
 */
void heap_root_set(heap_t* hp, native_int n, native_int* r) {
    TRACE;
    ASSERT(n < ROOTS);

    hp->root[n] = r;
}
 
/** Get a root node to a pointer (in the heap).
 *
 *  /param hp   a heap
 *  /param n    a root slot
 *  /return     a pointer to a structure
 */
 
native_int* heap_root_get(heap_t* hp, native_int n) {
    TRACE;
    ASSERT(n < ROOTS);

    return hp->root[n];
}

/** Allocate a node in the heap of given size.
 *
 *  Nodes may, but should not assumed, to be blanked. 
 *  Nodes may, but should not be assumed, to have an initialized 
 *  header field.
 *
 *  /param hp   a heap
 *  /param sz   the size requested
 *  /return     a pointer to a new node
 */
 
native_int* heap_allocate(heap_t* hp, native_int sz) {
    TRACE;

    native_int* p = hp->free;
    hp->free = ADVANCE(hp->free, sz);

    DEBUG(p[0] = sz;)
    DEBUG(native_int i;)
 
    DEBUG(for (i = 1; i < sz; i++) p[i] = 0;)

    return p; 
}
 
/** Copy nodes from one heap to another heap.
 *
 *  All nodes refered to by root nodes in the from heap are
 *  copied to the to heap.
 *
 *  Unsafe, the caller should make sure the to space holds
 *  enough room.
 *
 * /param from  the source heap
 * /param to    the destination heap
 * /retrun      the destination heap
 */
 
heap_t* heap_copy(heap_t* from, heap_t* to) {
    TRACE;

    native_int* copy_low;
    native_int* copy_high;
 
    native_int  i,j;
    native_int* p0;
    native_int* p1;

    copy_low  = to->free;
    copy_high = to->free;

    // copy the roots to the to-space
    for (i = 0; i < from->root_free; i++) {
        p0 = from->root[i];
        if (IN_HEAP(p0, from->low, from->high)) {
            p1 = (native_int*) p0[0];
            if (IN_HEAP(p1, to->low, to->high)) {
                to->root[to->root_free + i] = p1;
            } else {
                for (j = 0; j < p0[0]; j++) {
                    copy_high[j] = p0[j];
                }
 
                p0[0] = (native_int) copy_high;
                to->root[to->root_free + i] = copy_high;
                copy_high = ADVANCE(copy_high, copy_high[0]);
            }
        }
    }
    to->root_free += from->root_free;

    // copy all references to the to-space
    while (copy_low != copy_high)  {
        if (copy_low[1] != 0) {
            for (i = 2; i < copy_low[0]; i++) {
                p0 = (native_int*) copy_low[i];
                if (IN_HEAP(p0, from->low, from->high)) {
                    p1 = (native_int*) p0[0];
                    if (IN_HEAP(p1, to->low, to->high)) {
                        copy_low[i] = (native_int) p1;
                    } else {
                        for (j = 0; j < p0[0]; j++) copy_high[j] = p0[j];
                        copy_low[i] = (native_int) copy_high;
                        p0[0] = (native_int) copy_high;
                        copy_high= ADVANCE(copy_high, copy_high[0]);
                    }
                }
            }
        }
        copy_low = ADVANCE(copy_low, copy_low[0]);
    }
    to->free = copy_low;
    return to;
}
 
/** Resize a given heap to a given size by a copy.
 *
 *  /param hp   a heap
 *  /param sz   the requested size
 *  /return     a copy of the heap of given size
 */
heap_t* heap_resize(heap_t* hp, native_int sz) {
    TRACE;
    heap_t* to;
    to = heap_create(sz);
    to = heap_copy(hp, to);
    heap_destroy(hp);
    return to;
}

/** Collect (copy) a heap.
 *
 *  /param hp   a heap
 *  /return     a garbage collected copy of the heap
 */
heap_t* heap_collect(heap_t* r) {
    TRACE;

    heap_t* s;
    s = r;
    s = heap_resize(s, s->size);
    if (BELOW(s->trigger_grow, s->free)) {
        s = heap_resize(s, s->size + s->size);
    }
    return s;
}

/** Collect (copy) a heap if it runs out of space.
 *
 *  /param hp   a heap
 *  /return     a collected heap, or the original
 */
heap_t* heap_try_collect(heap_t* r) {
    TRACE;
    heap_t* s;
    s = r;
    if (BELOW(s->trigger_gc, s->free)) {
        fprintf(stdout, "HEAP!\n"); // XXX
        s = heap_collect(s);
    }
    return s;
}

/** Reserve space in a heap.
 *
 *  /param hp   a heap
 *  /param sz   the requested size
 *  /return     a heap with at least sz free bytes
 */
heap_t* heap_reserve(heap_t* r, native_int sz) {
    TRACE;
 
    heap_t* s;
    s = r;
    if ((void*)heap_free(s) <= (void*)sz) {
        s = heap_collect(s);
    }
    while ((void*)heap_free(s) <= (void*)sz) {
        s = heap_resize(s, s->size + s->size);
    }

    return s;
}
 
/** Copy everything reachable (also outside the heap) to another
 *  heap.
 *
 *  Note: Doesn't handle cyclic structures.
 *
 *  /param from a source heap
 *  /param to   a destination heap
 *  /return     the destination heap
 */
heap_t* heap_copy_deep(heap_t* from, heap_t* to) {
    TRACE;

    native_int* copy_low;
    native_int* copy_high;
    native_int  i,j;
    native_int* p0;
    native_int* p1; 
    copy_low  = to->low;
    copy_high = to->low;

    ASSERT(from->root_free == 1);
    to->root_free = from->root_free;

    // copy the root to the to-space
    p0 = from->root[0];
    for (j = 0; j < p0[0]; j++) {
        copy_high[j] = p0[j];
    }
    to->root[0] = copy_high;
    copy_high = ADVANCE(copy_high, copy_high[0]);

    // copy all references to the to-space
    while (copy_low != copy_high)  {
        if (copy_low[1] != 0) {
            for (i = 2; i < copy_low[0]; i++) {
                p0 = (native_int*) copy_low[i];
                if (!IS_SMALL_INTEGER(p0)) {
                    for (j = 0; j < p0[0]; j++) copy_high[j] = p0[j];
                    copy_low[i] = (native_int) copy_high;
                    copy_high= ADVANCE(copy_high, copy_high[0]);
                }
            }
        }
        copy_low = ADVANCE(copy_low, copy_low[0]);
    }

    to->free = copy_low;

    return to;
}

/** Serialize a heap to memory.
 *
 *  /param hp   a heap
 *  /param p    a piece of memory
 *  /param sz   the size of the memory in bytes
 *  /return     the number of bytes used
 */
native_int heap_serialize(heap_t* t, void* p, native_int sz) {
    TRACE;

    // coerce p to a heap
    heap_t* r = (heap_t*) p;
    r->size = sz;
 
    r->low  = (native_int*) &(r->memory);
    r->high = INCREMENT(r, sz);
 
    r->free = r->low;
    r->root_free = 0;
    r->trigger_gc   = DECREMENT(r->high, HEAP_MINIMAL_FREE);
    r->trigger_grow = DECREMENT(r->high, HEAP_MINIMAL_GROW);
    // copy the first root to p
    native_int tmp;
    tmp = t->root_free;
    t->root_free = 1;
    heap_copy_deep(t, r);
    t->root_free = tmp;

    // return the size of the copied region
    return (native_int) (DIFFERENCE(r->free, r));
}

/** Unserialize memory to a heap.
 *
 *  /param hp   a heap
 *  /param p    a piece of memory
 *  /param sz   the size of the memory in bytes
 *  /return     the root of the structure copied
 */
native_int* heap_unserialize(heap_t* t, void* p, native_int sz) {
    TRACE;

    native_int i;
    heap_t* r = (heap_t*) p;

    // get the pointer offset...
    native_int offset = DIFFERENCE((&(r->memory)), r->root[0]);

    // alligning code
    r->low  = INCREMENT(r->low, offset);
    r->high = INCREMENT(r->high, offset);
    r->free = INCREMENT(r->free, offset);
    
    for (i = 0; i < ROOTS; i++) {
        r->root[i] = INCREMENT(r->root[i], offset);
    }
    native_int* low  = r->low;
    native_int* high = r->free;
    while (low != high)  {
        if (low[1] != 0) {
            for (i = 2; i < low[0]; i++) {
                if (!IS_SMALL_INTEGER(low[i])) {
                    low[i] = (native_int) 
                        INCREMENT((native_int*)low[i], offset);
                }
            }
        }
        low = ADVANCE(low, low[0]);
    }
    // copying code
    heap_copy(r, t);
    t->root_free--;
    return t->root[t->root_free];
}

/** The size, total number of integers/pointers, of a heap.
 *
 *  /param hp   a heap
 *  /return     the size of the heap
 */
native_int heap_size(heap_t* hp) {
    return (native_int)
        (((void*) hp->high - (void*) hp->low)/sizeof(native_int));
}

/** The free space, total number of unallocated integers/pointers, of a heap.
 *
 *  /param hp   a heap
 *  /return     the size of the free space of the heap
 */
native_int heap_free(heap_t* hp) {
    return (native_int)
        (((void*) hp->high - (void*) hp->free)/sizeof(native_int));
}

/** The used space, total number of allocated integers/pointers, of a heap.
 *
 *  /param hp   a heap
 *  /return     the size of the used space of the heap
 */
native_int heap_used(heap_t* hp) {
    return (native_int)
        (((void*) hp->free - (void*) hp->low)/sizeof(native_int));
}

/** Print some information to a file pointer.
 *
 *  /param f    a file pointer
 *  /param hp   a heap
 */
void heap_info(FILE* f, heap_t* hp) {
    fprintf(f, "**********************************************************************\n");
    fprintf(f, "GARBAGE COLLECTION INFORMATION\n");
    fprintf(f, "0-heap size           : %ld\n",   hp->size);
    fprintf(f, "0-heap low            : %p\n",    hp->low);
    fprintf(f, "0-heap high           : %p\n",    hp->high);
    fprintf(f, "0-heap free           : %p\n",    hp->free);
    fprintf(f, "0-heap roots          : %d\n",    hp->root_free);
    fprintf(f, "0-heap gc trigger     : %p\n",    hp->trigger_gc);
    fprintf(f, "0-heap grow trigger   : %p\n\n",  hp->trigger_grow);
    fprintf(f, "1-heap size           : %ld\n",   heap_size(hp));
    fprintf(f, "1-heap used           : %ld\n",   heap_used(hp));
    fprintf(f, "1-heap free           : %ld\n\n", heap_free(hp));
    fprintf(f, "2-heap minimal free   : %d\n",    HEAP_MINIMAL_FREE);
    fprintf(f, "2-heap grow           : %d\n\n",  HEAP_MINIMAL_GROW);
    fprintf(f, "**********************************************************************\n");
}

// just for debugging purposes, loops on cyclic structures
void heap_print_node(FILE* f, native_int* n) {
    unsigned int i;
    fprintf(f, "------------------------------------\n");
    fprintf(f, "node[%p].size : %d\n", (void*) n, (unsigned int) n[0]);
    fprintf(f, "node[%p].tag  : %d\n", (void*) n, (unsigned int) n[1]);
    if (n[1] == 0) {
        for (i = 2; i < n[0]; i++) {
            fprintf(f, "node[%p].[%d]  : %d\n", (void*) n, i, (unsigned int) n[i]);
        }
    } else {
        for (i = 2; i < n[0]; i++) {
            fprintf(f, "node[%p].[%d]  : %p\n", (void*) n, i, (void*) n[i]);
        }
        for (i = 2; i < n[0]; i++) {
            heap_print_node(f, (native_int*) n[i]);
        }
    }
}

// just for debugging purposes
void heap_debug(FILE* f, heap_t* hp) {
    native_int i;
    heap_info(f, hp);
    for (i = 0; i < hp->root_free; i++) {
        fprintf(f, "ROOT %d:\n", i);
        heap_print_node(f, hp->root[i]);
        fprintf(f, "**********************************************************************\n");
    }
}

No comments:

Post a Comment