// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/arch/ia32/ia32_o3_jit/gc_eh_support.cpp,v 1.2 2001/08/13 09:54:55 xhshi Exp $
//



#include "defines.h"
#include "gc_eh_support.h"
#include "flow_graph.h"
#include "bit_vector_group.h"
#include "expression.h"
#include "jit_runtime_support.h"
#include "bitstream.h"
#include "pldi.h"

#undef TRACE_O3
//#define DEBUGGING_OUTPUT
extern JIT_Handle O3_Jit_Handle;
static unsigned nhits, nmisses, uhits, umisses, thits, tmisses;

//----------------------------------------------------------------------------------
// If USE_DISASSEMBLER_FOR_INST_LENGTH is set, we use the x86 disassembler to
// compute the length of an individual instruction, rather than storing it in
// the method_info.  The advantage is a 15-20% reduction in the method_info
// size.  The disadvantage is slower encoding and decoding, due to the slow
// speed of the current disassembler.
//#define USE_DISASSEMBLER_FOR_INST_LENGTH // slow
//----------------------------------------------------------------------------------

//----------------------------------------------------------------------------------
// If USE_BB_INDEX is set, we embed an index into the method_info that allows us
// to jump directly to the record for a particular basic block, rather than having
// to scan through all the previous entries to get to the desired one.  The
// advantage is speed, and the disadvantage is the small amount of additional
// space required.
#define USE_BB_INDEX  // fast
//----------------------------------------------------------------------------------

//----------------------------------------------------------------------------------
// If DISABLE_CACHING is set, we disable the quick lookup cache for unwinding and
// esp normalizing.  Since we use a fast hash table for the lookup, it's hard to
// imagine an advantage to disabling the cache.
//#define DISABLE_CACHING  // slow
//----------------------------------------------------------------------------------


#if defined(USE_DISASSEMBLER_FOR_INST_LENGTH) || defined(PLDI)
#include "..\dump\disasm_intf.h"
#endif // USE_DISASSEMBLER_FOR_INST_LENGTH

/*
    A guide to the method_info bit array.  Hopefully I'll keep it up to date
    as I make changes.

  Fixed information:
    bbcount_bits:code_size_bits_bits
    bbcount:bbcount_bits
    max_inst_length_bits:code_size_bits_bits
    n_spill_words:code_size_bits  [code_size_bits is computed externally]
    0:1                              > if n_callee==0
    1:1                              \
    n_callee-1:2                     |
    first_push_offset:3              | if n_callee>0
    first_pop_offset:code_size_bits  |
    return_offset:code_size_bits     /
    this_in_reg:1   \ if synchronized nonstatic method
    this_offset:32  /
    gc_unsafe_nonzero:1
      gc_unsafe_size:code_size_bits
      gc_unsafe_array:gc_unsafe_size*code_size_bits
    eip_offset_array:code_size_bits*(bbcount-1)
//#ifdef USE_BB_INDEX
    mi_offset_array:mi_offset_bits*(bbcount-1)
//#endif // USE_BB_INDEX
  Unified records:
    num_inst_records:code_size_bits
    adj_ever_nonzero:1  [0 if num_inst_records==0 and initial esp adjustment is 0]
    esp_adj_bits:5                \ if adj_ever_nonzero==1
    initial_esp_adj:esp_adj_bits  /
    init_reg_state:8  [esp bit set means bv_size==0]
    bv_size_bits:5                  \
    n_lref_entry:bv_size_bits       | if bv_size>0
    bv_size:bv_size_bits            /
    esp_offset_bits:5  > if bv_size>0
    esp_offset_array:bv_size*esp_offset_bits  [esp offsets are right-shifted 2 bits]
    Instruction records:  [num_inst_records total]
      inst_length:code_size_bits
      reg_change:1
        which_reg:3                             \ if reg_change==1
          reg_state:8  > if which_reg==esp_reg  /
      stk_change:1
        idx_to_toggle:bv_size_bits  > if stk_change==1
      esp_change:1
        was_push:1
          ref_pushed:1  > if was_push==1
          was_call:1  > if was_push==0
          esp_adj:esp_adj_bits  > if was_call==0



      was_push:1
      was_call:1                                                      \ if was_push==0
      esp_adj:esp_adj_bits  [right-shifted 2 bits]  > if was_call==0  /
      stk_change_reg_nochange:1
      stk_change_reg_change:1  > if stk_change_reg_nochange==0
      which_reg:3                           \ if reg_change
      reg_state:8  > if which_reg==esp_reg  /
      not_single_stk:1
      idx_to_toggle:bv_size_bits  > if not_single_stk==0   \
      is_call:1                                            | if 
      idx1:bv_size_bits  \ if is_call==0                   |
      idx2:bv_size_bits  /                                 /

    Note: eip_array contains the eip offset for the start of each basic block.
    GC_array contains offsets into the method_info bit array for the GC records.
    ESP_array does the same for ESP records.
    XXX-Note: search for all the ":5" entries and make sure they're in the
    range 0:31 and not 1:32.


  */

static void print_jsr_problem_info(Cfg_Node *node)
{
#ifdef ORP_NT
    cout << "Bogus JSR problem in BB " << node->label << endl;
    Flow_Graph *fg = node->flowgraph;
    while (fg != NULL)
    {
        Method_Handle mh = fg->m_handle();
        cout << "  Method " << class_get_name(method_get_class(mh))
            << "." << method_get_name(mh) << method_get_descriptor(mh) << endl;
        fg = fg->calling_fg;
    }
#endif
}

void GC_Map::start_bb(Cfg_Node *node, unsigned emitter_offset, int n_words_pushed, Frame &frame)
{
    unsigned i;
    // Create a new GC map structure for this basic block.
    GC_Map_BB *bbmap = new(mem) GC_Map_BB();
    bbmap->insert_before(&bbhead);
    bbmap->initial_emitter_offset = emitter_offset;

    ////////////////////////////////
    // GC information
    ////////////////////////////////

    // Record the live ref registers at BB entry.
    unsigned char regs = node->live->live_refs_regs();
    bbmap->gc_initial_live_ref_regs = regs;
    bbmap->gc_current_live_ref_regs = regs;
    // Find the number of live ref stack locations at BB entry.
    // XXX- Surely this can be done more efficiently.
    unsigned count = node->live->num_live_refs();
    bbmap->gc_num_initial_live_ref_stack = count;
    bbmap->gc_live_ref_esp_offsets = (int *) mem.alloc(count * sizeof(int));
    bbmap->gc_live_ref_esp_offsets_size = count;
    bbmap->gc_live_ref_esp_offsets_capacity = count;
    // Record the live ref stack locations at BB entry.
    unsigned *lr_array = _tmp_lr_array;
    node->live->fill_in_lr_array(lr_array);
    unsigned j;
    for (j=0; j<count; j++)
    {
        i = lr_array[j];
        unsigned home_loc = fg->bvpmap->frame_offsets[i];
        assert(home_loc != 0x23456789);
        if (fg->bvpmap->use_var_offset[i])
            bbmap->gc_live_ref_esp_offsets[j] = frame.var_offset(home_loc);
        else
            bbmap->gc_live_ref_esp_offsets[j] =
            frame.spill_offset(frame.n_spill - home_loc - 1);
    }
    // To do: record the live unknowns (both registers and stack locations) at BB entry.
    // Record the initial emitter offset.
    bbmap->current_emitter_offset = emitter_offset;
    bbmap->gc_esp_rec_size = bbmap->gc_esp_rec_capacity = 0;

    ////////////////////////////////
    // ESP information
    ////////////////////////////////

    // Record ESP adjustment at BB entry.
    bbmap->esp_last_adjustment = n_words_pushed * 4;
    bbmap->esp_adjustment_bytes = bbmap->esp_last_adjustment;

    ////////////////////////////////
    // JSR information
    ////////////////////////////////

#ifdef DEBUGGING_OUTPUT
    cout << "BB#" << node->label << ": emitter offset " << emitter_offset
         << ", initial esp adjustment " << bbmap->esp_adjustment_bytes << endl;
#endif // DEBUGGING_OUTPUT
}

// Returns a GCTrack operand of type JIT_TYPE_CLASS whose live range ends in
// the instruction.  Returns NULL if none.
static Operand *which_ref_opnd_lr_ends(Inst *inst, unsigned bit)
{
    Operand *opnd;
    if (!(inst->live_ranges_ended & (1u << bit)))
        return NULL;
    if (inst->exp != NULL && inst->type() == JIT_TYPE_UNKNOWN)
    {
        assert(inst->is_assignment());
        assert(inst->dst()->kind == Operand::GCTrack);
        assert(inst->src(0)->kind == Operand::GCTrack);
        assert(bit == 2);
        Assign_Inst *assn = (Assign_Inst *)inst;
        switch (assn->real_type)
        {
        case JIT_TYPE_CLASS:
            return inst->src(0);
            break;
        case JIT_TYPE_UNKNOWN:
            assert(0);
            return NULL;
            break;
        default:
            return NULL;
            break;
        }
    }
    // An odd-numbered bit position can only be an offset, not a reference.
    if (bit % 2 == 1)
        return NULL;
    bit /= 2;
    if (bit == 0) // destination operand
       return inst->dst()->base();
    opnd = inst->src(bit-1);
    assert(opnd != NULL);
    if (opnd->is_status_flags())
        return NULL;
    if (!opnd->is_reg())
    {
        if (opnd->base()->type == JIT_TYPE_ADDR) // vtable reference
            return NULL;
        else
            return opnd->base();
    }
    switch (/*inst->type()*/opnd->type)
    {
    case JIT_TYPE_CLASS:
    case JIT_TYPE_ARRAY:
#if 1
        assert(inst->type() == JIT_TYPE_CLASS || inst->type() == JIT_TYPE_ARRAY ||
            (inst->is_gc_unsafe() && inst->can_use_lea()));
#endif // 0
        return opnd;
    default:
        assert(inst->is_call() || inst->exp == NULL || inst->is_compare() ||
            (inst->type() != JIT_TYPE_CLASS && inst->type() != JIT_TYPE_ARRAY));
        return NULL;
    }
}

static unsigned get_index(int *&array, int &size, int &capacity, int value, Mem_Manager &mem)
{
    int i;
    for (i=0; i<size; i++)
    {
        if (array[i] == value)
            return (unsigned) i;
    }
    RESIZE_ARRAY(int, array, capacity, size, 4, mem);
    array[size] = value;
    unsigned result = (unsigned) size;
    size ++;
    return result;
}

static unsigned get_index(int *&array, int &size, int &capacity, Operand *opnd, Frame &frame, Mem_Manager &mem)
{
    int value;
    if (opnd->is_arg() ||
        (opnd->kind == Operand::GCTrack && ((GCTrack_Operand *)opnd)->use_arg_home_loc()))
        value = frame.var_offset(((GCTrack_Operand *)opnd)->home_location());
    else
        value = frame.spill_offset(frame.n_spill - ((GCTrack_Operand *)opnd)->home_location() - 1);
    return get_index(array, size, capacity, value, mem);
}

void GC_Map_BB::add_inst_gc(Inst *inst, unsigned emitter_offset, Mem_Manager &mem, Frame &frame,
                            GC_Map *map, bool &created_new_record)
{
    // We first need to decide whether this instruction changes any live ranges.
    // It generally does if reference_change_str() returns "L" on any bits, or if
    // the LHS is a GCTrack and the LHS type is JIT_TYPE_CLASS.  Exceptions:
    //   eax =.L [eax+8]    eax's live range both ends and starts.
    //   push               outarg live range starts.
    //   call               all outarg live ranges end, as do scratch registers.
    // (probably checking esp_adjustment is better than checking call_inst,
    // because of the caller-pop multianewarray.)
    bool top_of_stack_live = false;
    bool eax_livened_by_call = false;
    bool call_with_no_ref_args = false;
    unsigned char current_live_regs = gc_current_live_ref_regs;
    Operand *stack_opnd_killed = NULL;  // at most one stack operand's live range ends
    Operand *stack_opnd_livened = NULL;
    Operand *opnd_livened = NULL;

    if (inst->is_push())
    {
        O3_Jit_Type push_type = inst->src(0)->type;
        if (push_type == JIT_TYPE_CLASS || push_type == JIT_TYPE_ARRAY)
            top_of_stack_live = true;
    }
    else if (inst->is_call())
    {
        // A call instruction cannot end the live range of a JIT_TYPE_CLASS
        // home location.  And inst->srcs(0) cannot contain a JIT_TYPE_CLASS
        // register.
        current_live_regs &= ~ALL_X86_CALLER_REGS;
        Call_Inst *cinst = (Call_Inst *) inst;
        Inst *ret = cinst->get_ret();
        eax_livened_by_call =
            (ret != NULL &&
            (ret->type() == JIT_TYPE_CLASS || ret->type() == JIT_TYPE_ARRAY));
        call_with_no_ref_args = true;
        unsigned i;
        for (i=0; i<cinst->n_args(); i++)
        {
            O3_Jit_Type ty = cinst->get_arg(i)->type();
            if (ty == JIT_TYPE_CLASS || ty == JIT_TYPE_ARRAY)
            {
                call_with_no_ref_args = false;
                break;
            }
        }
    }
    else
    {
#if 0  // This is a nice optimization, but let's put debugging output into one place for now.
        if ((inst->live_ranges_ended & 0x55) == 0 && inst->type() != JIT_TYPE_CLASS)
            return;
#endif
        Operand *dst = inst->dst();
        if (dst != NULL && dst->kind == Operand::GCTrack && dst->type == JIT_TYPE_CLASS)
            opnd_livened = dst;
    }

    // Apply the liveness changes.
    unsigned bit;
    for (bit=0; bit<2*(MAX_SRCS+1); bit+=2)
    {
        Operand *which_opnd = which_ref_opnd_lr_ends(inst, bit);
        if (which_opnd != NULL)
        {
            assert(which_opnd->kind == Operand::GCTrack);
            unsigned bvp = which_opnd->bv_position();
            if (bvp < n_reg)
                current_live_regs &= ~(1u << bvp);
            else
            {
                assert(stack_opnd_killed == NULL || stack_opnd_killed == which_opnd);
                stack_opnd_killed = which_opnd;
            }
        }
    }
    if (opnd_livened != NULL)
    {
        unsigned bvp = opnd_livened->bv_position();
        if (bvp < n_reg)
            current_live_regs |= (1u << bvp);
        else
            stack_opnd_livened = opnd_livened;
    }
    if (eax_livened_by_call)
        current_live_regs |= (1u << eax_reg);

    if (stack_opnd_killed == stack_opnd_livened)
        stack_opnd_killed = stack_opnd_livened = NULL;

    if (current_live_regs == gc_current_live_ref_regs &&
        stack_opnd_killed == stack_opnd_livened &&
        !top_of_stack_live &&
        (!inst->is_call() || call_with_no_ref_args))
    {
#ifdef TRACE_O3_1
        cout << "Emitter offset " << emitter_offset << ": no liveness changes." << endl;
#endif // TRACE_O3_1
        return;
    }
    gc_current_live_ref_regs = current_live_regs;
#ifdef TRACE_O3_1
    cout << "Emitter offset " << emitter_offset << ": register refs";
    unsigned regno;
    for (regno=0; regno<n_reg; regno++)
    {
        if (gc_current_live_ref_regs & (1u << regno))
            cout << " " << X86_Reg_Str[regno];
    }
    if (gc_current_live_ref_regs == 0)
        cout << " [none]";
    cout << endl;
#endif // TRACE_O3_1

    created_new_record = true;
    if (map->largest_inst_length < gc_esp_records[gc_esp_rec_size].inst_length)
        map->largest_inst_length = gc_esp_records[gc_esp_rec_size].inst_length;
    current_emitter_offset = emitter_offset;
    gc_esp_records[gc_esp_rec_size].live_ref_regs = gc_current_live_ref_regs;
    gc_esp_records[gc_esp_rec_size].top_of_stack_live = (char)(top_of_stack_live);
    if (stack_opnd_killed != NULL)
    {
        gc_esp_records[gc_esp_rec_size].live_ref_change =
            (unsigned char)lr_ends;
        gc_esp_records[gc_esp_rec_size].stack_change_index =
            get_index(gc_live_ref_esp_offsets,
                      gc_live_ref_esp_offsets_size,
                      gc_live_ref_esp_offsets_capacity,
                      stack_opnd_killed,
                      frame,
                      mem);
#ifdef TRACE_O3_1
        cout << "Emitter offset " << emitter_offset << ": var "
             << stack_opnd_killed->bv_position() << " killed." << endl;
#endif // TRACE_O3_1
    }
    else if (stack_opnd_livened != NULL)
    {
        assert(!top_of_stack_live);
        gc_esp_records[gc_esp_rec_size].live_ref_change =
            (unsigned char)lr_starts;
        gc_esp_records[gc_esp_rec_size].stack_change_index =
            get_index(gc_live_ref_esp_offsets,
                      gc_live_ref_esp_offsets_size,
                      gc_live_ref_esp_offsets_capacity,
                      stack_opnd_livened,
                      frame,
                      mem);
#ifdef TRACE_O3_1
        cout << "Emitter offset " << emitter_offset << ": var "
             << stack_opnd_livened->bv_position() << " livened." << endl;
#endif // TRACE_O3_1
    }
    else if (top_of_stack_live)
    {
        assert(stack_opnd_livened == NULL);
        assert(stack_opnd_killed == NULL);
#ifdef TRACE_O3_1
        cout << "Emitter offset " << emitter_offset << ": live reference pushed onto stack." << endl;
#endif // TRACE_O3_1
    }
    else
        gc_esp_records[gc_esp_rec_size].live_ref_change =
        (unsigned char)none;

}

void GC_Map::add_inst(Inst *inst, unsigned pre_offset, unsigned emitter_offset, Frame &frame)
{
    if (inst->is_gc_unsafe())
    {
        RESIZE_ARRAY(unsigned, _gc_unsafe_offsets, _gc_unsafe_capacity, _gc_unsafe_size, 4, mem);
        _gc_unsafe_offsets[_gc_unsafe_size++] = pre_offset;
    }
    GC_Map_BB *bbmap = bbhead.prev();  // Current node is at the end of the list.
    RESIZE_ARRAY(GC_Map_BB::unified_gc_esp_rec, bbmap->gc_esp_records,
        bbmap->gc_esp_rec_capacity, bbmap->gc_esp_rec_size, 4, mem);
    bbmap->gc_esp_records[bbmap->gc_esp_rec_size].init(emitter_offset - bbmap->current_emitter_offset);
    bbmap->gc_esp_records[bbmap->gc_esp_rec_size].live_ref_regs = bbmap->gc_current_live_ref_regs;
    // Process the ESP adjustment information of the instruction.
    // Instructions that modify esp:
    //   push, pop
    //   call (but multianewarray is caller-pop)
    //   add/sub esp, ...
    int esp_adjustment = inst->esp_effect();
    int new_esp_adjustment = bbmap->esp_last_adjustment;
    if (inst->is_call() && esp_adjustment == 1)
        // consider all arguments to be popped now
        new_esp_adjustment = 0;
    else
        new_esp_adjustment -= esp_adjustment;
    bbmap->gc_esp_records[bbmap->gc_esp_rec_size].esp_adjustment = new_esp_adjustment;
    bool created_new_record = false;
    if (new_esp_adjustment != bbmap->esp_last_adjustment)
    {
        created_new_record = true;
        bbmap->esp_last_adjustment = new_esp_adjustment;
        int i = bbmap->gc_esp_rec_size;
        if (largest_inst_length < bbmap->gc_esp_records[i].inst_length)
            largest_inst_length = bbmap->gc_esp_records[i].inst_length;
        bbmap->current_emitter_offset = emitter_offset;
#ifdef DEBUGGING_OUTPUT
        cout << "  esp adjustment record, inst_length="
             << bbmap->esp_adjustment_records[i].inst_length
             << ", adjustment="
             << bbmap->esp_adjustment_records[i].esp_adjustment
             << endl;
#endif // DEBUGGING_OUTPUT
    }

    // Process the GC information of the instruction.
    // This comes after processing ESP information, so that GC information knows
    // the esp position for a push instruction.
    bbmap->add_inst_gc(inst, emitter_offset, mem, frame, this, created_new_record);

    // Process the JSR information of the instruction.

    if (created_new_record)
        bbmap->gc_esp_rec_size ++;
}

void GC_Map::end_bb(Cfg_Node *node, unsigned emitter_offset)
{
    if (bbhead.prev()->initial_emitter_offset != emitter_offset)
    {
        bbcount ++;
        if (node->get_enclosing_subr() == NULL)
        {
            if (!((Bit_Vector_Group_GC *)node->live)->no_unknowns())
                print_jsr_problem_info(node);
        }
#ifdef PLDI
        if (!((Bit_Vector_Group_GC *)node->live)->no_unknowns())
            pldi_jsr_problems ++;
#endif // PLDI
    }
    else
    {
        bbhead.prev()->unlink();
#ifdef DEBUGGING_OUTPUT
        cout << "Removing 0-length BB#" << node->label << endl;
#endif // DEBUGGING_OUTPUT
    }
}

#ifdef USE_DISASSEMBLER_FOR_INST_LENGTH
static unsigned i_len(char *code_block, unsigned cur_offset, unsigned max_len)
{
    unsigned result = x86_inst_length(code_block + cur_offset);
    assert(result > 0 && result <= max_len);
    return result;
}
#endif // USE_DISASSEMBLER_FOR_INST_LENGTH

static void write_inst_length(unsigned length, BitStream &bits, unsigned width,
                              char *code_block, unsigned cur_offset,
                              BitStream::StatType tag)
{
    //bits.emit_unsigned(0xdeadbeef, 32, tag);
#ifdef USE_DISASSEMBLER_FOR_INST_LENGTH
    unsigned len = i_len(code_block, cur_offset, length);
    if (len == length)
    {
        bits.emit_unsigned(1, 1, tag);
        return;
    }
    bits.emit_unsigned(0, 1, tag);
    bits.emit_unsigned(length, width, tag);
#ifdef PLDI
    pldi_inst_hist[length >= MAX_INST_HIST ? 0 : length] ++;
#endif // PLDI

#else // USE_DISASSEMBLER_FOR_INST_LENGTH

    if (length >= 1 && length <= 7)
        bits.emit_unsigned(length-1, 3, tag);
    else
    {
        bits.emit_unsigned(7, 3, tag);
        bits.emit_unsigned(length, width, tag);
    }
#ifdef PLDI
    pldi_inst_hist[length >= MAX_INST_HIST ? 0 : length] ++;
#endif // PLDI
#endif // USE_DISASSEMBLER_FOR_INST_LENGTH
}

static unsigned read_inst_length(BitStream &bits, unsigned width,
                                 char *code_block, unsigned cur_offset)
{
    //unsigned blah = bits.read_unsigned(32);
    //assert(blah == 0xdeadbeef);
#ifdef USE_DISASSEMBLER_FOR_INST_LENGTH
    if (bits.read_unsigned(1) == 1)
        return i_len(code_block, cur_offset, -1);
    return bits.read_unsigned(width);
#else // USE_DISASSEMBLER_FOR_INST_LENGTH
    unsigned result = bits.read_unsigned(3) + 1;
    if (result == 8)
        result = bits.read_unsigned(width);
    return result;
#endif // USE_DISASSEMBLER_FOR_INST_LENGTH
}

// 1 = esp                  (55%)
// 0, 1 = reg, esp          (18%)
// 0, 0, 1 = reg            (15%)
// 0, 0, 0, 1 = reg, stk    ( 6%)
// 0, 0, 0, 0, 1 = stk, esp ( 3%)
// 0, 0, 0, 0, 0, 1 = stk   ( 3%)
// 0, 0, 0, 0, 0, 0 = !esp & !reg & !stk
static void write_reg_stk_esp_change(bool reg_change, bool stk_change, bool esp_change,
                                     BitStream &bits)
{
    if (esp_change && !reg_change && !stk_change)
    {
        bits.emit_unsigned(1, 1, BitStream::stat_rec_reg_stk_esp_change);
        return;
    }
    bits.emit_unsigned(0, 1, BitStream::stat_rec_reg_stk_esp_change);
    if (esp_change && !stk_change && reg_change)
    {
        bits.emit_unsigned(1, 1, BitStream::stat_rec_reg_stk_esp_change);
        return;
    }
    bits.emit_unsigned(0, 1, BitStream::stat_rec_reg_stk_esp_change);
    if (!esp_change && !stk_change && reg_change)
    {
        bits.emit_unsigned(1, 1, BitStream::stat_rec_reg_stk_esp_change);
        return;
    }
    bits.emit_unsigned(0, 1, BitStream::stat_rec_reg_stk_esp_change);
    if (!esp_change && stk_change && reg_change)
    {
        bits.emit_unsigned(1, 1, BitStream::stat_rec_reg_stk_esp_change);
        return;
    }
    bits.emit_unsigned(0, 1, BitStream::stat_rec_reg_stk_esp_change);
    if (esp_change && stk_change && !reg_change)
	{
        bits.emit_unsigned(1, 1, BitStream::stat_rec_reg_stk_esp_change);
		return;
	}
    bits.emit_unsigned(0, 1, BitStream::stat_rec_reg_stk_esp_change);
    if (!esp_change && stk_change && !reg_change)
	{
        bits.emit_unsigned(1, 1, BitStream::stat_rec_reg_stk_esp_change);
		return;
	}
	assert(!esp_change && !stk_change && !reg_change);
    bits.emit_unsigned(0, 1, BitStream::stat_rec_reg_stk_esp_change);
}

// 1 = esp                  (55%)
// 0, 1 = reg, esp          (18%)
// 0, 0, 1 = reg            (15%)
// 0, 0, 0, 1 = reg, stk    ( 6%)
// 0, 0, 0, 0, 1 = stk, esp ( 3%)
// 0, 0, 0, 0, 0, 1 = stk   ( 3%)
// 0, 0, 0, 0, 0, 0 = !esp & !reg & !stk
static void read_reg_stk_esp_change(bool &reg_change, bool &stk_change, bool &esp_change,
                                    BitStream &bits)
{
    if (bits.read_unsigned(1) == 1)
    {
        reg_change = false;
        stk_change = false;
        esp_change = true;
        return;
    }
    if (bits.read_unsigned(1) == 1)
    {
        reg_change = true;
        stk_change = false;
        esp_change = true;
        return;
    }
    if (bits.read_unsigned(1) == 1)
    {
        reg_change = true;
        stk_change = false;
        esp_change = false;
        return;
    }
    if (bits.read_unsigned(1) == 1)
    {
        reg_change = true;
        stk_change = true;
        esp_change = false;
        return;
    }
    if (bits.read_unsigned(1) == 1)
    {
        reg_change = false;
        stk_change = true;
        esp_change = true;
        return;
    }
    if (bits.read_unsigned(1) == 1)
    {
		reg_change = false;
		stk_change = true;
		esp_change = false;
        return;
    }
	reg_change = false;
	stk_change = false;
	esp_change = false;
}

extern bool O3_statistics; 
void GC_Map::emit_all(Flow_Graph *fg, Frame &frame, Method_Handle mh, Expressions &exprs,
                      BitStream &bits, unsigned &mi_offset_bits, unsigned code_size, bool first)
{
    if (!first)
        bits.start_writing(exprs.mem, bits.get_offset());
    GC_Map_BB *rec;

    //
    // emit profiling info
    //
    if (O3_statistics) {
        bits.emit_unsigned(1, 1, BitStream::stat_prof_bits);
        bits.emit_unsigned((unsigned)fg->o3_prof_rec, sizeof(unsigned)*8, BitStream::stat_prof_bits);
    } else
        bits.emit_unsigned(0, 1, BitStream::stat_prof_bits);
#ifdef PLDI_OVERRIDDEN
    //
    // emit virtual method overridden info
    //
    if (fg->overridden_rec != NULL) {
        bits.emit_unsigned(1, 1, BitStream::stat_prof_bits);
        bits.emit_unsigned((unsigned)fg->overridden_rec, sizeof(unsigned)*8, BitStream::stat_overridden_bits);
    } else
        bits.emit_unsigned(0, 1, BitStream::stat_overridden_bits);
#endif

    unsigned code_size_bits = BitStream::bits_required_for_unsigned(code_size);
    unsigned code_size_bits_bits = BitStream::bits_required_for_unsigned(code_size_bits);
    unsigned inst_len_bits = BitStream::bits_required_for_unsigned(largest_inst_length);
#ifdef _DEBUG
    unsigned rmask = fg->callee_saved_registers_used();
    unsigned ncallee = frame.n_callee;
    if (ncallee > 0) assert(rmask & (1u << ebx_reg));
    if (ncallee > 1) assert(rmask & (1u << ebp_reg));
    if (ncallee > 2) assert(rmask & (1u << esi_reg));
    if (ncallee > 3) assert(rmask & (1u << edi_reg));
#endif // _DEBUG
    // number of basic blocks
    unsigned bbcount_bits = BitStream::bits_required_for_unsigned(bbcount);
    bits.emit_unsigned(bbcount_bits, code_size_bits_bits, BitStream::stat_bbcount_bits);
    bits.emit_unsigned(bbcount, bbcount_bits, BitStream::stat_bbcount);
#ifdef PLDI
    if (!first) pldi_bb_snapshots += bbcount;
#endif // PLDI
    bits.emit_unsigned(inst_len_bits, code_size_bits_bits, BitStream::stat_max_inst_len_bits);
    bits.emit_unsigned(frame.n_spill, code_size_bits, BitStream::stat_n_spill_words);
    if (frame.n_callee == 0)
        bits.emit_unsigned(0, 1, BitStream::stat_n_callee_zero);
    else
    {
        bits.emit_unsigned(1, 1, BitStream::stat_n_callee_zero);
        bits.emit_unsigned(frame.n_callee - 1, 2, BitStream::stat_n_callee);
    }
    if (frame.n_callee > 0)
    {
        if (_first_pop_offset == -1)
            _first_pop_offset = code_size - 1;
        if (_return_offset == -1)
            _return_offset = code_size - 1;
        bits.emit_unsigned(_first_push_offset, 3, BitStream::stat_push_pop_ret_offsets);
        bits.emit_unsigned(_first_pop_offset, code_size_bits, BitStream::stat_push_pop_ret_offsets);
        bits.emit_unsigned(_return_offset, code_size_bits, BitStream::stat_push_pop_ret_offsets);
    }
    if (method_is_synchronized(mh) && !method_is_static(mh))
    {
        unsigned tmp_this_offset = -1;
        unsigned tmp_this_in_reg = 1;
        Reg_Operand *thisp = (Reg_Operand *)fg->this_pointer_of_method;
        if (thisp->assigned_preg() == n_reg)
        {
            tmp_this_in_reg = 0;
            unsigned home_location = thisp->home_location();
            assert(home_location != -1);
            if (thisp->use_arg_home_loc())
                tmp_this_offset = frame.var_offset(home_location);
            else
                tmp_this_offset = frame.spill_offset(frame.n_spill - home_location - 1);
        }
        else
        {
            tmp_this_in_reg = 1;
            tmp_this_offset = thisp->assigned_preg();
        }
        bits.emit_unsigned(tmp_this_in_reg, 1, BitStream::stat_sync);
        bits.emit_signed(tmp_this_offset, 32, BitStream::stat_sync);
    }

    // array of GC unsafe points
    if (_gc_unsafe_size == 0)
        bits.emit_unsigned(0, 1, BitStream::stat_gc_safe);
    else
    {
        bits.emit_unsigned(1, 1, BitStream::stat_gc_safe);
        bits.emit_unsigned(_gc_unsafe_size, code_size_bits, BitStream::stat_gc_safe);
        int ii;
        for (ii=0; ii<_gc_unsafe_size; ii++)
            bits.emit_unsigned(_gc_unsafe_offsets[ii], code_size_bits, BitStream::stat_gc_safe);
#ifdef PLDI
        if (!first)
        {
            pldi_methods_with_wb ++;
            pldi_gc_unsafe_inst += _gc_unsafe_size;
        }
#endif // PLDI
    }

    // array of basic block start EIPs
    for (rec = bbhead.next()->next(); rec != &bbhead; rec = rec->next())
        bits.emit_unsigned(rec->initial_emitter_offset, code_size_bits, BitStream::stat_eip_array);

#ifdef USE_BB_INDEX
    unsigned cur_mi_offset_offset = bits.get_offset();
    // array of method_info offsets for each basic block (except the first)
    bits.set_offset(cur_mi_offset_offset + mi_offset_bits * (bbcount - 1));
#endif // USE_BB_INDEX

    bool first_gc_bb = true;
    // Walk through the unified BB records.
    for (rec = bbhead.next(); rec != &bbhead; first_gc_bb = false, rec = rec->next())
    {
#ifdef USE_BB_INDEX
        if (!first_gc_bb)
        {
            bits.emit_unsigned_at_offset(bits.get_offset(), mi_offset_bits,
                cur_mi_offset_offset, BitStream::stat_mi_offset_array);
            cur_mi_offset_offset += mi_offset_bits;
        }
#endif // USE_BB_INDEX
        unsigned cur_emitter_offset = rec->initial_emitter_offset;
        bits.emit_unsigned(rec->gc_esp_rec_size, code_size_bits, BitStream::stat_num_inst_recs);
        // esp adjustment upon BB entry
        int min_esp = 0;
        int max_esp = 0;
        if (rec->esp_adjustment_bytes < 0)
            min_esp = rec->esp_adjustment_bytes;
        else
            max_esp = rec->esp_adjustment_bytes;
        int last_val = rec->esp_adjustment_bytes;
        int i;
        for (i=0; i<rec->gc_esp_rec_size; i++)
        {
            int val = rec->gc_esp_records[i].esp_adjustment;
            if (val != 0 && (val - last_val != 4) && (last_val - val != 4))
            {
                if (val > max_esp) max_esp = val;
                if (val < min_esp) min_esp = val;
            }
        }
        unsigned t1 = BitStream::bits_required_for_signed(max_esp >> 2);
        unsigned t2 = BitStream::bits_required_for_signed(min_esp >> 2);
        unsigned esp_adj_bits = (t1 > t2 ? t1 : t2);
        if (rec->gc_esp_rec_size == 0 && rec->esp_adjustment_bytes == 0)
            bits.emit_unsigned(0, 1, BitStream::stat_adj_nonzero);
        else
        {
            bits.emit_unsigned(1, 1, BitStream::stat_adj_nonzero);
            bits.emit_unsigned(esp_adj_bits, 5, BitStream::stat_adj_bits);
            bits.emit_signed(rec->esp_adjustment_bytes >> 2, esp_adj_bits, BitStream::stat_initial_adj);
        }
        unsigned bv_size = rec->gc_live_ref_esp_offsets_size;
        unsigned bv_size_bits = BitStream::bits_required_for_unsigned(bv_size);
        // live ref register bitmap upon BB entry
        // set esp bit to indicate that bv_size == 0
        unsigned reg_state = rec->gc_initial_live_ref_regs;
        if (first_gc_bb)
            assert(reg_state == 0);  // XXX- fix for fastcall
        unsigned prev_reg_state = reg_state;
        if (first_gc_bb)
        {
            if (bv_size == 0)
                bits.emit_unsigned(1, 1, BitStream::stat_init_reg_state);
            else
                bits.emit_unsigned(0, 1, BitStream::stat_init_reg_state);
        }
        else
        {
            if (bv_size == 0)
                reg_state |= (1u << esp_reg);
            bits.emit_unsigned(reg_state, 8, BitStream::stat_init_reg_state);
        }
        if (bv_size != 0)
        {
            bits.emit_unsigned(bv_size_bits, 5, BitStream::stat_bv_size_parms);
            // # live refs on stack upon BB entry
            bits.emit_unsigned(rec->gc_num_initial_live_ref_stack, bv_size_bits, BitStream::stat_bv_size_parms);
            // total # of stack locations that will contain live refs
            bits.emit_unsigned(bv_size, bv_size_bits, BitStream::stat_bv_size_parms);
        }
        // array of mappings from BV position to stack offset
        int max_esp_offset = 0;
        int min_esp_offset = 0;
        for (i=0; i<rec->gc_live_ref_esp_offsets_size; i++)
        {
            int val = rec->gc_live_ref_esp_offsets[i];
            if (max_esp_offset < val) max_esp_offset = val;
            if (min_esp_offset > val) min_esp_offset = val;
        }
        t1 = BitStream::bits_required_for_unsigned(max_esp_offset >> 2);
        t2 = BitStream::bits_required_for_unsigned(min_esp_offset >> 2);
        unsigned esp_offset_bits = (t1 > t2 ? t1 : t2);
        if (bv_size > 0)
            bits.emit_unsigned(esp_offset_bits, 5, BitStream::stat_esp_offset_parms);
        //if (first) cout << "+++ ";
        for (i=0; i<rec->gc_live_ref_esp_offsets_size; i++)
        {
            assert(rec->gc_live_ref_esp_offsets[i] % 4 == 0);
            bits.emit_unsigned(rec->gc_live_ref_esp_offsets[i] >> 2, esp_offset_bits, BitStream::stat_esp_offset_parms);
            //if (first) cout << rec->gc_live_ref_esp_offsets[i] << " ";
        }
        //if (first) cout << "+++" << endl;
        last_val = rec->esp_adjustment_bytes;
        // the individual instruction records:
        for (i=0; i<rec->gc_esp_rec_size; i++)
        {
            // length in bytes
            write_inst_length(rec->gc_esp_records[i].inst_length, bits, inst_len_bits,
                fg->code_block, cur_emitter_offset,
                BitStream::stat_rec_inst_len);
#ifdef PLDI
            if (!first)
            {
                unsigned true_length = x86_inst_length(fg->code_block + cur_emitter_offset);
                if (true_length == rec->gc_esp_records[i].inst_length)
                    pldi_num_single_inst ++;
                else
                    pldi_num_multiple_inst ++;
            }
#endif // PLDI
            cur_emitter_offset += rec->gc_esp_records[i].inst_length;
            //if (first) cout << rec->gc_esp_records[i].inst_length << endl;
            // length in bytes
            // register status -- set esp bit if necessary
            unsigned reg_status = rec->gc_esp_records[i].live_ref_regs;
            bool reg_change = (reg_status != prev_reg_state);
            bool stk_change =
                (rec->gc_esp_records[i].live_ref_change != (char)GC_Map_BB::none);
            int val = rec->gc_esp_records[i].esp_adjustment;
            bool esp_change = (val != last_val);
            write_reg_stk_esp_change(reg_change, stk_change, esp_change, bits);
#ifdef PLDI
            if (!first) pldi_inst_effect[reg_change][stk_change][esp_change]++;
#endif // PLDI
            if (reg_change)
            {
                unsigned change = (reg_status ^ prev_reg_state);
                assert(change);
                if (change & (change - 1))  // multiple-bit
                {
                    bits.emit_unsigned(0, 1, BitStream::stat_rec_reg_parms); // not eax
                    bits.emit_unsigned(esp_reg, 3, BitStream::stat_rec_reg_parms);
                    bits.emit_unsigned(reg_status, 8, BitStream::stat_rec_reg_parms);
#ifdef PLDI
                    if (!first) pldi_multiple_reg_change++;
#endif // PLDI
                }
                else
                {
                    unsigned which_reg;
                    for (which_reg=0; which_reg<n_reg; which_reg++)
                    {
                        if ((1u << which_reg) == change)
                            break;
                    }
                    assert(which_reg < n_reg);
#ifdef PLDI
                    if (!first) pldi_single_reg_change[which_reg]++;
#endif // PLDI
                    if (which_reg == eax_reg)
                        bits.emit_unsigned(1, 1, BitStream::stat_rec_reg_parms); // is eax
                    else
                    {
                        bits.emit_unsigned(0, 1, BitStream::stat_rec_reg_parms); // not eax
                        bits.emit_unsigned(which_reg, 3, BitStream::stat_rec_reg_parms);
                    }
                }
            }
            prev_reg_state = reg_status;

            if (stk_change)
            {
                bits.emit_unsigned(rec->gc_esp_records[i].stack_change_index,
                    bv_size_bits, BitStream::stat_rec_stk_change);
            }

            // esp adjustment at end of instruction
            if (esp_change)
            {
                // 1 = push ref          (35%)
                // 0, 1 = push nonref    (31%)
                // 0, 0, 1 = call        (28%)
                // 0, 0, 0, 1 = pop      ( 5%)
                // 0, 0, 0, 0 = other    ( 1%)
                assert(val % 4 == 0);
                if (val - last_val == 4)
                {
                    if (rec->gc_esp_records[i].top_of_stack_live)
                    {
                        bits.emit_unsigned(1, 1, BitStream::stat_rec_esp_change_op);
#ifdef PLDI
                        if (!first) pldi_stk_push_ref++;
#endif // PLDI
                    }
                    else
                    {
                        bits.emit_unsigned(0, 1, BitStream::stat_rec_esp_change_op);
                        bits.emit_unsigned(1, 1, BitStream::stat_rec_esp_change_op);
#ifdef PLDI
                        if (!first) pldi_stk_push_nonref++;
#endif // PLDI
                    }
                }
                else
                {
                    if (val == 0)
                    {
                        bits.emit_unsigned(0, 1, BitStream::stat_rec_esp_change_op);
                        bits.emit_unsigned(0, 1, BitStream::stat_rec_esp_change_op);
                        bits.emit_unsigned(1, 1, BitStream::stat_rec_esp_change_op);
#ifdef PLDI
                        if (!first) pldi_stk_call++;
#endif // PLDI
                    }
                    else if (last_val - val == 4)
                    {
                        bits.emit_unsigned(0, 1, BitStream::stat_rec_esp_change_op);
                        bits.emit_unsigned(0, 1, BitStream::stat_rec_esp_change_op);
                        bits.emit_unsigned(0, 1, BitStream::stat_rec_esp_change_op);
                        bits.emit_unsigned(1, 1, BitStream::stat_rec_esp_change_op);
#ifdef PLDI
                        if (!first) pldi_stk_pop++;
#endif // PLDI
                    }
                    else
                    {
                        bits.emit_unsigned(0, 1, BitStream::stat_rec_esp_change_op);
                        bits.emit_unsigned(0, 1, BitStream::stat_rec_esp_change_op);
                        bits.emit_unsigned(0, 1, BitStream::stat_rec_esp_change_op);
                        bits.emit_unsigned(0, 1, BitStream::stat_rec_esp_change_op);
                        bits.emit_signed(rec->gc_esp_records[i].esp_adjustment >> 2, esp_adj_bits,
                            BitStream::stat_rec_esp_change_adj);
#ifdef PLDI
                        if (!first) pldi_stk_other++;
#endif // PLDI
                    }
                }
            }
            last_val = val;
        }
    }
#ifdef USE_BB_INDEX
    // Figure out the optimal value of mi_offset_bits.
    // This is the smallest N such that 2^N >= mi_size.
    // If bbcount==1, then no space was allocated for mi_offsets in the first place.
    if (first && bbcount > 1)
    {
        unsigned final_size = bits.get_offset();
        do
        {
            final_size -= (bbcount - 1);
            mi_offset_bits --;
        }
        while ((1u << mi_offset_bits) >= final_size);
        mi_offset_bits ++;
        final_size += (bbcount - 1);
        bits.set_offset(final_size);
    }
#endif // USE_BB_INDEX
}

extern unsigned n_words_of_method_arg_type(Method_Handle handle);
//
// KEN 
// The routine only retrieves the frist two fields, prof_rec and overridden_rec.
// We can avoid spending on retrieving the information we don't need.
//
void init_prof_and_overridden(BitStream& bits, 
                             GC_Map::fixed_width_info& fwi)
{
    bits.set_offset(0);
    //
    // read o3_profiling_rec
    //
    unsigned has_prof_rec = bits.read_unsigned(1);
    if (has_prof_rec == 1)
        fwi.prof_rec = (void*)bits.read_unsigned(sizeof(unsigned)*8);
    else
        fwi.prof_rec = NULL;

#ifdef PLDI_OVERRIDDEN
    //
    // read virtual method overridden rec
    //
    unsigned has_overridden_rec = bits.read_unsigned(1);
    if (has_overridden_rec == 1)
        fwi.overridden_rec = (void*)bits.read_unsigned(sizeof(unsigned)*8);
    else
        fwi.overridden_rec = NULL;
#endif
}

void init_pointers(BitStream &bits, 
                   unsigned eip, 
                   Method_Handle method,
                   GC_Map::fixed_width_info &fwi,
                   bool set_values,
                   unsigned &this_rec_ptr,
                   unsigned &bb_start_eip)
{
    init_prof_and_overridden(bits,fwi);

    fwi.code_block = (unsigned) method_get_code_block_addr(method, O3_Jit_Handle);
    fwi.code_size_bits = BitStream::bits_required_for_unsigned(method_get_code_block_size(method, O3_Jit_Handle));
    unsigned code_size_bits_bits = BitStream::bits_required_for_unsigned(fwi.code_size_bits);
    fwi.mi_size_bits = BitStream::bits_required_for_unsigned(8 * method_get_info_block_size(method, O3_Jit_Handle) - 1);
    unsigned num_bb_bits = bits.read_unsigned(code_size_bits_bits);
    fwi.num_bb = bits.read_unsigned(num_bb_bits);
    fwi.inst_len_bits = bits.read_unsigned(code_size_bits_bits);
    //fwi.num_incoming = bits.read_unsigned(8);
    // XXX- this doesn't yet work right for args-in-registers
    fwi.num_incoming = n_words_of_method_arg_type(method);
    if (!method_is_static(method)) fwi.num_incoming ++;
    fwi.num_spill = bits.read_unsigned(fwi.code_size_bits);
    if (bits.read_unsigned(1) == 0)
        fwi.num_callee = 0;
    else
        fwi.num_callee = bits.read_unsigned(2) + 1;
    if (fwi.num_callee == 0)
    {
        fwi.first_push_eip = fwi.code_block - 1;
        fwi.first_pop_eip = fwi.code_block - 1;
        fwi.return_eip = fwi.code_block - 1;
    }
    else
    {
        fwi.first_push_eip = fwi.code_block + bits.read_unsigned(3);
        fwi.first_pop_eip = fwi.code_block + bits.read_unsigned(fwi.code_size_bits);
        fwi.return_eip = fwi.code_block + bits.read_unsigned(fwi.code_size_bits);
    }
    if (method_is_synchronized(method) && !method_is_static(method))
    {
        fwi.this_in_reg = bits.read_unsigned(1);
        fwi.this_offset = bits.read_signed(32);
    }
    else
    {
#ifdef _DEBUG
        fwi.this_in_reg = 0;
        fwi.this_offset = -1;
#endif // _DEBUG
    }

    if (bits.read_unsigned(1) == 0)
        fwi.num_gc_unsafe = 0;
    else
        fwi.num_gc_unsafe = bits.read_unsigned(fwi.code_size_bits);
    fwi.gc_unsafe_offset = bits.get_offset();
    bits.set_offset(bits.get_offset() + fwi.num_gc_unsafe * fwi.code_size_bits);

    if (!set_values)
        return;

    unsigned num_bb = fwi.num_bb;
    unsigned bb_eip_array_offset = bits.get_offset();

    // Find basic block number.
    unsigned i;
    unsigned which_bb = 0;
    for (i=1; i<num_bb; i++)
    {
        unsigned new_eip = bits.read_unsigned(fwi.code_size_bits) + fwi.code_block;
        if (new_eip > eip)
            break;
        which_bb = i;
    }
    assert(which_bb < num_bb);

    if (which_bb == 0)
        bb_start_eip = fwi.code_block;
    else
        bb_start_eip = bits.read_unsigned_at_offset(fwi.code_size_bits,
        bb_eip_array_offset + fwi.code_size_bits * (which_bb - 1)) + fwi.code_block;
    bits.set_offset(bb_eip_array_offset + fwi.code_size_bits * (num_bb - 1));

#ifdef USE_BB_INDEX
    unsigned mi_array_offset = bb_eip_array_offset + fwi.code_size_bits * (num_bb - 1);
    unsigned first_record_offset = mi_array_offset + fwi.mi_size_bits * (num_bb - 1);
    if (which_bb == 0)
        this_rec_ptr = first_record_offset;
    else
        this_rec_ptr = bits.read_unsigned_at_offset(fwi.mi_size_bits,
        mi_array_offset + fwi.mi_size_bits * (which_bb - 1));

#else // USE_BB_INDEX

    // Walk through all the unified records.
    for (i=0; i<num_bb; i++)
    {
        if (i == which_bb)
        {
            this_rec_ptr = bits.get_offset();
            break;
        }
        unsigned cur_offset = 0;
        if (i > 0)
            cur_offset = bits.read_unsigned_at_offset(fwi.code_size_bits,
            bb_eip_array_offset + fwi.code_size_bits * (i - 1));
        unsigned numrec = bits.read_unsigned(fwi.code_size_bits);
        unsigned esp_adj_bits;
        int esp_adj_bb_entry;
        if (bits.read_unsigned(1) == 0)
        {
            assert(numrec == 0);
            esp_adj_bits = 0;
            esp_adj_bb_entry = 0;
        }
        else
        {
            esp_adj_bits = bits.read_unsigned(5);
            esp_adj_bb_entry = bits.read_signed(esp_adj_bits) << 2;
        }
        unsigned char reg_state;
        if (i == 0)
        {
            reg_state = 0;
            if (bits.read_unsigned(1) == 1)
                reg_state |= (1u << esp_reg);
        }
        else
        {
            reg_state = bits.read_unsigned(8);
        }
        unsigned bv_size_bits, num_initial_live, bv_size;
        if (reg_state & (1u << esp_reg))
        {
            reg_state &= ~(1u << esp_reg);
            bv_size_bits = 0;
            num_initial_live = 0;
            bv_size = 0;
        }
        else
        {
            bv_size_bits = bits.read_unsigned(5);
            num_initial_live = bits.read_unsigned(bv_size_bits);
            bv_size = bits.read_unsigned(bv_size_bits);
        }
        unsigned esp_offset_bits = 0;
        if (bv_size > 0)
            esp_offset_bits = bits.read_unsigned(5);
        unsigned esp_offset_array_offset = bits.get_offset();
        bits.set_offset(bits.get_offset() + esp_offset_bits * bv_size);

        unsigned rec;
        for (unsigned rec=0; rec<numrec; rec++)
        {
            unsigned len = read_inst_length(bits, fwi.inst_len_bits, (char *)fwi.code_block, cur_offset);
            cur_offset += len;
            bool reg_change, stk_change, esp_change;
            read_reg_stk_esp_change(reg_change, stk_change, esp_change, bits);
            if (reg_change) // reg_change
            {
                if (bits.read_unsigned(1) == 0) // not eax
                {
                    if (bits.read_unsigned(3) == esp_reg)
                        bits.read_unsigned(8);
                }
            }
            if (stk_change) // stk_change
            {
                bits.read_unsigned(bv_size_bits);
            }
            if (esp_change) // esp change
            {
                if (bits.read_unsigned(1) == 0)
                {
                    if (bits.read_unsigned(1) == 0)
                    {
                        if (bits.read_unsigned(1) == 0)
                        {
                            if (bits.read_unsigned(1) == 0)
                            {
                                bits.read_unsigned(esp_adj_bits);
                            }
                        }
                    }
                }
            }
        }
    }
    assert(i < num_bb && i == which_bb);
#endif // USE_BB_INDEX
}

#define NCACHE_SIZE 1024
static struct {
    uint32 eip;
    Boolean is_first;
    int result;
} normalize_cache[NCACHE_SIZE];
static CRITICAL_SECTION ncache_lock;
static bool ncache_lock_initialized = false;

static bool normalize_with_cache(Frame_Context *context, Boolean isFirst, int &result)
{
#ifdef DISABLE_CACHING
    return false;
#endif // DISABLE_CACHING
    if (!ncache_lock_initialized)
    {
        InitializeCriticalSection(&ncache_lock);
        ncache_lock_initialized = true;
    }
    // Try to acquire a lock.  If it's already locked, return false.
    if (!TryEnterCriticalSection(&ncache_lock))
        return false;

    bool retval = false;
    unsigned i = *context->p_eip % NCACHE_SIZE;
    if (normalize_cache[i].eip == *context->p_eip &&
        normalize_cache[i].is_first == isFirst)
    {
        result = normalize_cache[i].result;
        context->esp += result;
        retval = true;
    }
    // Release the lock before returning.
    LeaveCriticalSection(&ncache_lock);
    return retval;
}

static void normalize_add_to_cache(Frame_Context *context, Boolean isFirst, int result)
{
#ifdef DISABLE_CACHING
    return;
#endif // DISABLE_CACHING
    // Try to acquire a lock.  If it's already locked, just return without doing anything.
    if (!TryEnterCriticalSection(&ncache_lock))
        return;

    unsigned i = *context->p_eip % NCACHE_SIZE;

    normalize_cache[i].eip = *context->p_eip;
    normalize_cache[i].is_first = isFirst;
    normalize_cache[i].result = result;

    // Release the lock before returning.
    LeaveCriticalSection(&ncache_lock);
}

#define UCACHE_SIZE 1024
static struct {
    uint32 eip;
    unsigned num_incoming;
    unsigned num_spill;
    unsigned num_callee;
    unsigned restore_mask;
} unwind_cache[UCACHE_SIZE];
static CRITICAL_SECTION ucache_lock;
static bool ucache_lock_initialized = false;

// Parameters needed: context, num_incoming, num_spill, num_callee, callee_restore_mask
static bool unwind_with_cache(Frame_Context *context)
{
#ifdef DISABLE_CACHING
    return false;
#endif // DISABLE_CACHING
    if (!ucache_lock_initialized)
    {
        InitializeCriticalSection(&ucache_lock);
        ucache_lock_initialized = true;
    }
    // Try to acquire a lock.  If it's already locked, return false.
    if (!TryEnterCriticalSection(&ucache_lock))
        return false;

    bool retval = false;
    unsigned i = *context->p_eip % UCACHE_SIZE;
    if (unwind_cache[i].eip == *context->p_eip)
    {
        retval = true;
        unsigned num_incoming = unwind_cache[i].num_incoming;
        unsigned num_spill =    unwind_cache[i].num_spill;
        unsigned num_callee =   unwind_cache[i].num_callee;
        unsigned restore_mask = unwind_cache[i].restore_mask;
        ESP_Frame frame(num_incoming, 0, 0, num_spill, num_callee);
        unsigned frame_size = (frame.n_extra + frame.n_spill + frame.n_callee + 1) * 4;
        unsigned esp = context->esp;
        unsigned old_eip = *context->p_eip;
        context->esp += frame_size;
        context->p_eip = (uint32 *)(context->esp - 4);
        if ((restore_mask & (1u << ebx_reg)))
            context->p_ebx = (uint32 *)(esp + frame.callee_offset(ebx_reg));
        if ((restore_mask & (1u << ebp_reg)))
            context->p_ebp = (uint32 *)(esp + frame.callee_offset(ebp_reg));
        if ((restore_mask & (1u << esi_reg)))
            context->p_esi = (uint32 *)(esp + frame.callee_offset(esi_reg));
        if ((restore_mask & (1u << edi_reg)))
            context->p_edi = (uint32 *)(esp + frame.callee_offset(edi_reg));
    }
    // Release the lock before returning.
    LeaveCriticalSection(&ucache_lock);
    return retval;
}

static void unwind_add_to_cache(uint32 eip, unsigned num_incoming,
                                unsigned num_spill, unsigned num_callee, unsigned restore_mask)
{
#ifdef DISABLE_CACHING
    return;
#endif // DISABLE_CACHING
    // Try to acquire a lock.  If it's already locked, just return without doing anything.
    if (!TryEnterCriticalSection(&ucache_lock))
        return;

    unsigned i = eip % UCACHE_SIZE;

    unwind_cache[i].eip = eip;
    unwind_cache[i].num_incoming = num_incoming;
    unwind_cache[i].num_spill = num_spill;
    unwind_cache[i].num_callee = num_callee;
    unwind_cache[i].restore_mask = restore_mask;

    // Release the lock before returning.
    LeaveCriticalSection(&ucache_lock);
}

#define TCACHE_SIZE 16
static struct {
    uint32 eip;
    Boolean is_first;
    unsigned this_in_reg;
    int this_offset;
} thisaddr_cache[TCACHE_SIZE];
static CRITICAL_SECTION tcache_lock;
static bool tcache_lock_initialized = false;

static bool thisaddr_with_cache(Frame_Context *context, Boolean isFirst,
                                unsigned &this_in_reg, int &this_offset)
{
#ifdef DISABLE_CACHING
    return false;
#endif // DISABLE_CACHING
    if (!tcache_lock_initialized)
    {
        InitializeCriticalSection(&tcache_lock);
        tcache_lock_initialized = true;
    }
    // Try to acquire a lock.  If it's already locked, return false.
    if (!TryEnterCriticalSection(&tcache_lock))
        return false;

    bool retval = false;
    unsigned i = *context->p_eip % TCACHE_SIZE;
    if (thisaddr_cache[i].eip == *context->p_eip &&
        thisaddr_cache[i].is_first == isFirst)
    {
        this_in_reg = thisaddr_cache[i].this_in_reg;
        this_offset = thisaddr_cache[i].this_offset;
        retval = true;
    }
    // Release the lock before returning.
    LeaveCriticalSection(&tcache_lock);
    return retval;
}

static void thisaddr_add_to_cache(Frame_Context *context, Boolean isFirst,
                                  unsigned this_in_reg, int this_offset)
{
#ifdef DISABLE_CACHING
    return;
#endif // DISABLE_CACHING
    // Try to acquire a lock.  If it's already locked, just return without doing anything.
    if (!TryEnterCriticalSection(&tcache_lock))
        return;

    unsigned i = *context->p_eip % TCACHE_SIZE;

    thisaddr_cache[i].eip = *context->p_eip;
    thisaddr_cache[i].is_first = isFirst;
    thisaddr_cache[i].this_in_reg = this_in_reg;
    thisaddr_cache[i].this_offset = this_offset;

    // Release the lock before returning.
    LeaveCriticalSection(&tcache_lock);
}

int GC_Map::normalize_esp(Method_Handle meth, Frame_Context *context, Boolean isFirst,
                          fixed_width_info &fwi, unsigned &this_rec_ptr, unsigned &bb_start_eip,
                          bool &valid_fwi)
{
    int esp_adjustment;
    if (normalize_with_cache(context, isFirst, esp_adjustment))
    {
        nhits++;
        return esp_adjustment;
    }
    nmisses++;

    uint32 eip = *context->p_eip;
    // The eip that we get either points to the instruction in this method
    // that caused the exception (isFirst=true), or it points to the instruction
    // right after a call (isFirst=false).  If isFirst=false, then the JIT is
    // supposed to pop the outgoing arguments.  Here, I fake it by making it look
    // like the eip points roughly to the previous instruction.
    if (!isFirst)
        eip --;

    Byte *mi = method_get_info_block(meth, O3_Jit_Handle);
    assert(mi != NULL);
    BitStream bits(mi, *(unsigned *)mi);
//    fixed_width_info fwi;
//    unsigned this_rec_ptr, bb_start_eip;
    if (!valid_fwi)
    {
        init_pointers(bits, eip, meth, fwi, true, this_rec_ptr, bb_start_eip);
        valid_fwi = true;
    }

    unsigned num_bb = fwi.num_bb;

    // Find the ESP record for this basic block.
    bits.set_offset(this_rec_ptr);
    unsigned numrec = bits.read_unsigned(fwi.code_size_bits);
    unsigned esp_adj_bits;
    int esp_adj_bb_entry;
    if (bits.read_unsigned(1) == 0)
    {
        assert(numrec == 0);
        esp_adj_bits = 0;
        esp_adj_bb_entry = 0;
    }
    else
    {
        esp_adj_bits = bits.read_unsigned(5);
        esp_adj_bb_entry = bits.read_signed(esp_adj_bits) << 2;
    }
        unsigned char reg_state;
        if (bb_start_eip == fwi.code_block)
        {
            reg_state = 0;
            if (bits.read_unsigned(1) == 1)
                reg_state |= (1u << esp_reg);
        }
        else
        {
            reg_state = bits.read_unsigned(8);
        }
        unsigned bv_size_bits, num_initial_live, bv_size;
        if (reg_state & (1u << esp_reg))
        {
            reg_state &= ~(1u << esp_reg);
            bv_size_bits = 0;
            num_initial_live = 0;
            bv_size = 0;
        }
        else
        {
            bv_size_bits = bits.read_unsigned(5);
            num_initial_live = bits.read_unsigned(bv_size_bits);
            bv_size = bits.read_unsigned(bv_size_bits);
        }
        unsigned esp_offset_bits = 0;
        if (bv_size > 0)
            esp_offset_bits = bits.read_unsigned(5);
        unsigned esp_offset_array_offset = bits.get_offset();
        bits.set_offset(bits.get_offset() + esp_offset_bits * bv_size);

    // Walk through the BB and apply the changes.
    unsigned test_eip = bb_start_eip;
    unsigned rec = 0; // current record
    unsigned inst_len;
    esp_adjustment = esp_adj_bb_entry;
    while (rec < numrec &&
           eip >= test_eip + (inst_len = read_inst_length(bits, fwi.inst_len_bits, (char *)fwi.code_block, test_eip-fwi.code_block)))
    {
        test_eip += inst_len;
        bool reg_change, stk_change, esp_change;
        read_reg_stk_esp_change(reg_change, stk_change, esp_change, bits);
        if (reg_change) // reg_change
        {
            if (bits.read_unsigned(1) == 0) // not eax
            {
                if (bits.read_unsigned(3) == esp_reg)
                    bits.read_unsigned(8);
            }
        }
        if (stk_change) // stk_change
        {
            bits.read_unsigned(bv_size_bits);
        }
        if (esp_change) // esp change
        {
            if (bits.read_unsigned(1) == 1) // push ref
                esp_adjustment += 4;
            else if (bits.read_unsigned(1) == 1) // push nonref
                esp_adjustment += 4;
            else if (bits.read_unsigned(1) == 1) // call
                esp_adjustment = 0;
            else if (bits.read_unsigned(1) == 1) // pop
                esp_adjustment -= 4;
            else
                esp_adjustment = (bits.read_signed(esp_adj_bits) << 2);
        }
        rec ++;
    }

    // Now adjust esp by adding esp_adjustment.
    context->esp += esp_adjustment;

    normalize_add_to_cache(context, isFirst, esp_adjustment);

    return esp_adjustment;
}

static bool register_has_been_pushed(GC_Map::fixed_width_info &fwi, unsigned eip, unsigned pushno)
{
    //return true; // Add this line to demonstrate a bug in the original unwinding code.
    unsigned eip_of_this_push = fwi.first_push_eip + pushno;
    unsigned eip_of_this_pop = fwi.first_pop_eip + fwi.num_callee - pushno - 1;
    // Return false if we're between the method entry and the push instruction.
    if (eip <= eip_of_this_push)
        return false;
    // Return false if we're between the pop instruction and the ret instruction.
    if (eip > eip_of_this_pop && eip <= fwi.return_eip)
        return false;
    return true;
}

// Assumes that the esp is already normalized.
void GC_Map::unwind(Method_Handle meth, Frame_Context *context,
                    fixed_width_info &fwi, unsigned &this_rec_ptr, unsigned &bb_start_eip,
                    bool &valid_fwi)
{
    if (unwind_with_cache(context))
    {
        uhits++;
        return;
    }
    umisses++;
    // Compute the frame size: n_callee, n_spill, n_extra, ret_IP.
    // Don't consider the in_args, because when we unwind, the caller
    // should see the stack frame as though the outgoing arguments are
    // still pushed.
    Byte *mi = method_get_info_block(meth, O3_Jit_Handle);
    assert(mi != NULL);
    BitStream bits(mi, *(unsigned *)mi);
//    fixed_width_info fwi;
//    unsigned this_rec_ptr, bb_start_eip;
    if (!valid_fwi)
    {
        init_pointers(bits, *context->p_eip, meth, fwi, false, this_rec_ptr, bb_start_eip);
        valid_fwi = true;
    }

    static unsigned maskarray[5] = {
        0,
        (1u << ebx_reg),
        (1u << ebx_reg) | (1u << ebp_reg),
        (1u << ebx_reg) | (1u << ebp_reg) | (1u << esi_reg),
        (1u << ebx_reg) | (1u << ebp_reg) | (1u << esi_reg) | (1u << edi_reg)
    };
    unsigned num_incoming = fwi.num_incoming;
    unsigned num_spill = fwi.num_spill;
    unsigned num_callee = fwi.num_callee;
    unsigned mask = maskarray[num_callee];
    ESP_Frame frame(num_incoming, 0, 0, num_spill, num_callee);
    unsigned frame_size = (frame.n_extra + frame.n_spill + frame.n_callee + 1) * 4;
    unsigned esp = context->esp;
    unsigned old_eip = *context->p_eip;

    // Adjust esp by the frame size.
    context->esp += frame_size;

    // Restore EIP from the return EIP.  This must come after adjusting esp.
    context->p_eip = (uint32 *)(context->esp - 4);

    unsigned restore_mask = 0;
    // Restore callee-saved registers.
    if ((mask & (1u << ebx_reg)) && register_has_been_pushed(fwi, old_eip, 0))
    {
        context->p_ebx = (uint32 *)(esp + frame.callee_offset(ebx_reg));
        restore_mask |= (1u << ebx_reg);
    }
    if ((mask & (1u << ebp_reg)) && register_has_been_pushed(fwi, old_eip, 1))
    {
        context->p_ebp = (uint32 *)(esp + frame.callee_offset(ebp_reg));
        restore_mask |= (1u << ebp_reg);
    }
    if ((mask & (1u << esi_reg)) && register_has_been_pushed(fwi, old_eip, 2))
    {
        context->p_esi = (uint32 *)(esp + frame.callee_offset(esi_reg));
        restore_mask |= (1u << esi_reg);
    }
    if ((mask & (1u << edi_reg)) && register_has_been_pushed(fwi, old_eip, 3))
    {
        context->p_edi = (uint32 *)(esp + frame.callee_offset(edi_reg));
        restore_mask |= (1u << edi_reg);
    }

    unwind_add_to_cache(old_eip, num_incoming, num_spill, num_callee, restore_mask);
}

// This function returns the live register bitmask.  This result is used by
// call_returns_a_reference(), which passes false as the do_enumeration
// argument.
unsigned GC_Map::enumerate(Method_Handle meth, GC_Enumeration_Handle enum_handle,
                           const Frame_Context *context, Boolean is_first, bool do_enumeration,
                           fixed_width_info &fwi, unsigned &this_rec_ptr, unsigned &bb_start_eip,
                           bool &valid_fwi)
{
    Byte *mi = method_get_info_block(meth, O3_Jit_Handle);
    assert(mi != NULL);
    BitStream bits(mi, *(unsigned *)mi);
//    fixed_width_info fwi;
//    unsigned this_rec_ptr, bb_start_eip;
    uint32 eip = *context->p_eip;
    if (!valid_fwi)
    {
        init_pointers(bits, eip, meth, fwi, true, this_rec_ptr, bb_start_eip);
        valid_fwi = true;
    }

    unsigned num_bb = fwi.num_bb;
    unsigned num_incoming = fwi.num_incoming;
    unsigned num_spill = fwi.num_spill;
    unsigned num_callee = fwi.num_callee;
    ESP_Frame frame(num_incoming, 0, 0, num_spill, num_callee);
    unsigned esp = context->esp;

    // Find the GC record for this basic block.
    bits.set_offset(this_rec_ptr);
    unsigned numrec = bits.read_unsigned(fwi.code_size_bits);
    unsigned esp_adj_bits;
    int esp_adj_bb_entry;
    if (bits.read_unsigned(1) == 0)
    {
        assert(numrec == 0);
        esp_adj_bits = 0;
        esp_adj_bb_entry = 0;
    }
    else
    {
        esp_adj_bits = bits.read_unsigned(5);
        esp_adj_bb_entry = bits.read_signed(esp_adj_bits) << 2;
    }
    unsigned char reg_state;
    if (bb_start_eip == fwi.code_block)
    {
        reg_state = 0;
        if (bits.read_unsigned(1) == 1)
            reg_state |= (1u << esp_reg);
    }
    else
    {
        reg_state = bits.read_unsigned(8);
    }
    unsigned bv_size_bits, num_initial_live, bv_size;
    if (reg_state & (1u << esp_reg))
    {
        reg_state &= ~(1u << esp_reg);
        bv_size_bits = 0;
        num_initial_live = 0;
        bv_size = 0;
    }
    else
    {
        bv_size_bits = bits.read_unsigned(5);
        num_initial_live = bits.read_unsigned(bv_size_bits);
        bv_size = bits.read_unsigned(bv_size_bits);
    }
    unsigned esp_offset_bits = 0;
    if (bv_size > 0)
        esp_offset_bits = bits.read_unsigned(5);
    if (bv_size > 1000)
        cout << "Huge bv_size: " << bv_size << endl;
    unsigned esp_offset_array_offset = bits.get_offset();
    bits.set_offset(bits.get_offset() + esp_offset_bits * bv_size);

    // My original implementation dynamically allocated a bit vector because there
    // could be an arbitrary number of stack locations.  However, it seems
    // inappropriate to dynamically allocate memory while GC is in progress.
    // As a solution, we can make several passes, and apply the changes to 32 bits
    // at a time.

    unsigned bits_offset = bits.get_offset();
    unsigned old_reg_state = reg_state;
    unsigned out_bv[8];
    unsigned iter;
    for (iter=0; iter==0 || iter<bv_size; iter+=32)
    {
        bits.set_offset(bits_offset);
        reg_state = old_reg_state;
        unsigned bv,ii;
        for (ii=0; ii<8; ii++) out_bv[ii] = 0;
        if (iter >= num_initial_live)
            bv = 0u;
        else if (iter+32 <= num_initial_live)
            bv = ~0u;
        else
            bv = (1u << (num_initial_live - iter)) - 1;
        
        // Walk through the BB and apply the changes.
        unsigned test_eip = bb_start_eip;
        unsigned rec = 0; // current record
        unsigned inst_len;
        int esp_adjustment = esp_adj_bb_entry;
        while (rec < numrec &&
            eip >= test_eip + (inst_len = read_inst_length(bits, fwi.inst_len_bits, (char *)fwi.code_block, test_eip-fwi.code_block)))
        {
            test_eip += inst_len;
            bool reg_change, stk_change, esp_change;
            read_reg_stk_esp_change(reg_change, stk_change, esp_change, bits);
            if (reg_change) // reg_change
            {
                unsigned which_reg;
                if (bits.read_unsigned(1) == 1)
                    which_reg = eax_reg;
                else
                    which_reg = bits.read_unsigned(3);
                if (which_reg == esp_reg)
                    reg_state = bits.read_unsigned(8);
                else
                    reg_state ^= (1u << which_reg);
            }
            if (stk_change) // stk_change
            {
                unsigned idx;
                idx = bits.read_unsigned(bv_size_bits);
                if (idx >= iter && idx < iter+32)
                    bv ^= (1u << (idx - iter));
            }
            if (esp_change) // esp change
            {
                if (bits.read_unsigned(1) == 1) // push ref
                {
                    esp_adjustment += 4;
                    assert(esp_adjustment >= 4);
                    unsigned outarg = (esp_adjustment - 4) >> 2;
                    out_bv[outarg / 32] |= (1u << (outarg % 32));
                }
                else if (bits.read_unsigned(1) == 1) // push nonref
                    esp_adjustment += 4;
                else if (bits.read_unsigned(1) == 1) // call
                {
                    esp_adjustment = 0;
                    for (ii=0; ii<8; ii++) out_bv[ii] = 0;
                }
                else if (bits.read_unsigned(1) == 1) // pop
                    esp_adjustment -= 4;
                else
                    esp_adjustment = (bits.read_signed(esp_adj_bits) << 2);
            }

            rec ++;
        }
        unsigned i;
        for (i=iter; i<iter+32 && i<bv_size; i++)
        {
            if (bv & (1u << (i-iter)))
            {
                int offset;
                unsigned val = bits.read_unsigned_at_offset(esp_offset_bits,
                    esp_offset_array_offset + esp_offset_bits * i) << 2;
                //if (i < num_non_outargs)
                    offset = (int)val;
                //else
                    //offset = -(int)val;
                if (do_enumeration)
                {
#ifdef TRACE_O3
                    cout << "O3 enumerating [esp+0x" << hex << offset << dec << "]" << endl;
#endif // TRACE_O3
                    orp_enumerate_root_reference(enum_handle, (void **)(esp + offset));
                }
            }
        }
    }

    unsigned outarg;
    for (outarg=0; outarg<256; outarg++)
    {
        if (out_bv[outarg/32] & (1u << (outarg%32)))
        {
#ifdef TRACE_O3
            cout << "O3 enumerating [esp-0x" << hex << (outarg*4 + 4) << dec << "]" << endl;
#endif // TRACE_O3
            orp_enumerate_root_reference(enum_handle, (void **)(esp - outarg*4 - 4));
        }
    }

    reg_state &= ~(1 << esp_reg);
    // Scratch registers can only be live if is_first is set.
    if (!is_first)
    {
        reg_state &= ~(1 << eax_reg);  // eax contains the method's return value
        assert(!(reg_state & (1 << ecx_reg)));
        assert(!(reg_state & (1 << edx_reg)));
    }
        
#ifdef TRACE_O3
#define STR(x) #x
#define STRINGIZE(x) STR(x)
#define ENUMERATE(reg) do { \
    if (reg_state & (1 << reg##_reg)) { \
    cout << "O3 enumerating " STRINGIZE(reg) " register" << endl; \
    orp_enumerate_root_reference(enum_handle, (void **)context->p_##reg); \
    }} while (0)
#else // TRACE_O3
#define ENUMERATE(reg) do { \
    if (reg_state & (1 << reg##_reg)) { \
    orp_enumerate_root_reference(enum_handle, (void **)context->p_##reg); \
    }} while (0)
#endif // TRACE_O3

    if (do_enumeration)
    {
        ENUMERATE(ebx);
        ENUMERATE(ebp);
        ENUMERATE(esi);
        ENUMERATE(edi);
        ENUMERATE(eax);
        ENUMERATE(ecx);
        ENUMERATE(edx);
    }

    return reg_state;
}


///////////////////////////////////////////////////
//
// The methods invoked through the JIT interface.
//
///////////////////////////////////////////////////

void GC_Map::fix_handler_context(Method_Handle      meth,              // in
                                 Frame_Context     *context,           // in out
                                 Boolean            isFirst            // in
                                 )
{
#ifdef TRACE_O3
    cout << "O3 fix_handler_context: " << class_get_name(method_get_class(meth))
         << "." << method_get_name(meth)
         << ": eip=0x" << hex << *context->p_eip 
         << ", esp=0x" << hex << context->esp << dec << endl;
#endif // TRACE_O3

    fixed_width_info fwi;
    unsigned this_rec_ptr, bb_start_eip;
    bool fwi_valid = false;
    normalize_esp(meth, context, isFirst, fwi, this_rec_ptr, bb_start_eip, fwi_valid);
}

void GC_Map::unwind_stack_frame(Method_Handle      meth,              // in
                                Frame_Context     *context,           // in out
                                Boolean            is_first           // in
                                )
{
#ifdef TRACE_O3
    cout << "O3 unwind_stack_frame: " << class_get_name(method_get_class(meth))
         << "." << method_get_name(meth)
         << ": eip=0x" << hex << *context->p_eip 
         << ", esp=0x" << hex << context->esp << dec << endl;
#endif // TRACE_O3

    fixed_width_info fwi;
    unsigned this_rec_ptr, bb_start_eip;
    bool fwi_valid = false;
    normalize_esp(meth, context, is_first, fwi, this_rec_ptr, bb_start_eip, fwi_valid);
    unwind(meth, context, fwi, this_rec_ptr, bb_start_eip, fwi_valid);
}

void GC_Map::get_root_set_from_stack_frame(Method_Handle         meth,          // in
                                           GC_Enumeration_Handle enum_handle,   // in
                                           Frame_Context        *context,       // in out
                                           Boolean               is_first
                                           )
{
#ifdef TRACE_O3
    cout << "O3 get_root_set_from_stack_frame: " << class_get_name(method_get_class(meth))
         << "." << method_get_name(meth)
         << ": eip=0x" << hex << *context->p_eip 
         << ", esp=0x" << hex << context->esp << dec << endl;
#endif // TRACE_O3

    fixed_width_info fwi;
    unsigned this_rec_ptr, bb_start_eip;
    bool fwi_valid = false;
    normalize_esp(meth, context, is_first, fwi, this_rec_ptr, bb_start_eip, fwi_valid);
    enumerate(meth, enum_handle, context, is_first, true, fwi, this_rec_ptr, bb_start_eip, fwi_valid);
    unwind(meth, context, fwi, this_rec_ptr, bb_start_eip, fwi_valid);
}

uint32 GC_Map::get_address_of_this(Method_Handle      meth,              // in
                                   const Frame_Context     *context,           // in
                                   Boolean            is_first           // in
                                   )
{
    Frame_Context orig_context = *context;
    uint32 eip = *context->p_eip;
    fixed_width_info fwi;
    unsigned this_rec_ptr, bb_start_eip;
    bool fwi_valid = false;
    normalize_esp(meth, &orig_context, is_first, fwi, this_rec_ptr, bb_start_eip, fwi_valid);
    unsigned this_in_reg;
    int this_offset;
    bool cached = thisaddr_with_cache(&orig_context, is_first, this_in_reg, this_offset);
    if (!cached)
    {
        tmisses++;
        Byte *mi = method_get_info_block(meth, O3_Jit_Handle);
        assert(mi != NULL);
        BitStream bits(mi, *(unsigned *)mi);
        fixed_width_info fwi;
        unsigned this_rec_ptr, bb_start_eip;
        init_pointers(bits, eip, meth, fwi, false, this_rec_ptr, bb_start_eip);
        this_in_reg = fwi.this_in_reg;
        this_offset = fwi.this_offset;
    }
    else thits++;
    assert(this_offset != -1);
    uint32 result;
    if (this_in_reg)
    {
        switch (this_offset)
        {
        case ebp_reg:
            result = (uint32) context->p_ebp;
            break;
        case ebx_reg:
            result = (uint32) context->p_ebx;
            break;
        case esi_reg:
            result = (uint32) context->p_esi;
            break;
        case edi_reg:
            result = (uint32) context->p_edi;
            break;
        default:
            assert(0);
            result = 0;
            break;
        }
    }
    else
        result = orig_context.esp + this_offset;

    if (!cached)
        thisaddr_add_to_cache(&orig_context, is_first, this_in_reg, this_offset);

    return result;
}

Boolean GC_Map::call_returns_a_reference(Method_Handle         meth,              // in
                                         const Frame_Context  *context            // in
                                         )
{
    // Do a fake enumeration, just to find out what's in eax.
    fixed_width_info fwi;
    unsigned this_rec_ptr, bb_start_eip;
    bool fwi_valid = false;
    unsigned reg_state = enumerate(meth, NULL, context, TRUE, false, fwi, this_rec_ptr, bb_start_eip, fwi_valid);
    return ((reg_state & (1 << eax_reg)) != 0);
}

Boolean GC_Map::can_enumerate(Method_Handle meth, uint32 eip)
{
    Byte *mi = method_get_info_block(meth, O3_Jit_Handle);
    assert(mi != NULL);
    BitStream bits(mi, *(unsigned *)mi);
    fixed_width_info fwi;
    unsigned this_rec_ptr, bb_start_eip;
    Frame_Context fc;
    fc.p_eip = &eip;
    init_pointers(bits, eip, meth, fwi, false, this_rec_ptr, bb_start_eip);

    bits.set_offset(fwi.gc_unsafe_offset);
    unsigned i;
    for (i=0; i<fwi.num_gc_unsafe; i++)
    {
        if (eip == bits.read_unsigned(fwi.code_size_bits) - fwi.code_block)
        {
            cout << "********** can_enumerate: GC unsafe point 0x" << endl;
            //cout << "********** can_enumerate: GC unsafe point 0x" << hex << eip << dec << endl;
            //return false;
        }
    }
    //return false;
    return true;
}
