rust/src/rt/rust_task.h

/**
   The rust task is a cooperatively-scheduled green thread that executes
   Rust code on a segmented stack.

   This class has too many responsibilities:

   * Working with the scheduler loop to signal and respond to state changes,
   and dealing with all the thread synchronization issues involved

   * Managing the dynamically resizing list of Rust stack segments

   * Switching between running Rust code on the Rust segmented stack and
   foreign C code on large stacks owned by the scheduler

   The lifetime of a rust_task object closely mirrors that of a running Rust
   task object, but they are not identical. In particular, the rust_task is an
   atomically reference counted object that might be accessed from arbitrary
   threads at any time. This may keep the task from being destroyed even after
   the task is dead from a Rust task lifecycle perspective.

   FIXME (#2696): The task and the scheduler have an over-complicated,
   undocumented protocol for shutting down the task, hopefully without
   races. It would be easier to reason about if other runtime objects could
   not access the task from arbitrary threads, and didn't need to be
   atomically refcounted.
 */

#ifndef RUST_TASK_H
#define RUST_TASK_H

#include <map>

#include "rust_globals.h"
#include "util/array_list.h"
#include "context.h"
#include "rust_debug.h"
#include "rust_kernel.h"
#include "boxed_region.h"
#include "rust_stack.h"
#include "rust_port_selector.h"
#include "rust_type.h"
#include "rust_sched_loop.h"

// The amount of extra space at the end of each stack segment, available
// to the rt, compiler and dynamic linker for running small functions
// FIXME (#1509): We want this to be 128 but need to slim the red zone calls
// down, disable lazy symbol relocation, and other things we haven't
// discovered yet
#define RZ_LINUX_32 (1024*2)
#define RZ_LINUX_64 (1024*2)
#define RZ_MAC_32   (1024*20)
#define RZ_MAC_64   (1024*20)
#define RZ_WIN_32   (1024*20)
#define RZ_BSD_32   (1024*20)
#define RZ_BSD_64   (1024*20)

#ifdef __linux__
#ifdef __i386__
#define RED_ZONE_SIZE RZ_LINUX_32
#endif
#ifdef __x86_64__
#define RED_ZONE_SIZE RZ_LINUX_64
#endif
#endif
#ifdef __APPLE__
#ifdef __i386__
#define RED_ZONE_SIZE RZ_MAC_32
#endif
#ifdef __x86_64__
#define RED_ZONE_SIZE RZ_MAC_64
#endif
#endif
#ifdef __WIN32__
#ifdef __i386__
#define RED_ZONE_SIZE RZ_WIN_32
#endif
#ifdef __x86_64__
#define RED_ZONE_SIZE RZ_WIN_64
#endif
#endif
#ifdef __FreeBSD__
#ifdef __i386__
#define RED_ZONE_SIZE RZ_BSD_32
#endif
#ifdef __x86_64__
#define RED_ZONE_SIZE RZ_BSD_64
#endif
#endif

struct rust_box;

struct frame_glue_fns {
    uintptr_t mark_glue_off;
    uintptr_t drop_glue_off;
    uintptr_t reloc_glue_off;
};

// std::lib::task::task_result
typedef unsigned long task_result;
#define tr_success 0
#define tr_failure 1

struct spawn_args;
struct cleanup_args;
struct reset_args;
struct new_stack_args;

// std::lib::task::task_notification
//
// since it's currently a unary tag, we only add the fields.
struct task_notification {
    rust_task_id id;
    task_result result; // task_result
};

extern "C" void
rust_task_fail(rust_task *task,
               char const *expr,
               char const *file,
               size_t line);

struct
rust_task : public kernel_owned<rust_task>
{
    RUST_ATOMIC_REFCOUNT();

    rust_task_id id;
    bool notify_enabled;
    rust_port_id notify_port;

    context ctx;
    stk_seg *stk;
    uintptr_t runtime_sp;      // Runtime sp while task running.
    rust_scheduler *sched;
    rust_sched_loop *sched_loop;

    // Fields known only to the runtime.
    rust_kernel *kernel;
    const char *const name;
    int32_t list_index;

    // Rendezvous pointer for receiving data when blocked on a port. If we're
    // trying to read data and no data is available on any incoming channel,
    // we block on the port, and yield control to the scheduler. Since, we
    // were not able to read anything, we remember the location where the
    // result should go in the rendezvous_ptr, and let the sender write to
    // that location before waking us up.
    uintptr_t* rendezvous_ptr;

    memory_region local_region;
    boxed_region boxed;

    // Indicates that fail() has been called and we are cleaning up.
    // We use this to suppress the "killed" flag during calls to yield.
    bool unwinding;

    bool propagate_failure;

    uint32_t cc_counter;

    debug::task_debug_info debug;

    // The amount of stack we're using, excluding red zones
    size_t total_stack_sz;

    // Used by rust task management routines in libcore/task.rs.
    void *task_local_data;
    void (*task_local_data_cleanup)(void *data);

private:

    // Protects state, cond, cond_name
    // Protects the killed flag, disallow_kill flag, reentered_rust_stack
    lock_and_signal lifecycle_lock;
    rust_task_state state;
    rust_cond *cond;
    const char *cond_name;

    bool event_reject;
    rust_cond event_cond;
    void *event;

    // Indicates that the task was killed and needs to unwind
    bool killed;
    // Indicates that we've called back into Rust from C
    bool reentered_rust_stack;
    unsigned long disallow_kill;

    // The stack used for running C code, borrowed from the scheduler thread
    stk_seg *c_stack;
    uintptr_t next_c_sp;
    uintptr_t next_rust_sp;

    rust_port_selector port_selector;

    // Called when the atomic refcount reaches zero
    void delete_this();

    void new_stack_fast(size_t requested_sz);
    void new_stack(size_t requested_sz);
    void free_stack(stk_seg *stk);
    size_t get_next_stack_size(size_t min, size_t current, size_t requested);

    void return_c_stack();

    void transition(rust_task_state src, rust_task_state dst,
                    rust_cond *cond, const char* cond_name);
    void transition_inner(rust_task_state src, rust_task_state dst,
                           rust_cond *cond, const char* cond_name);

    bool must_fail_from_being_killed_inner();
    // Called by rust_task_fail to unwind on failure
    void begin_failure(char const *expr,
                       char const *file,
                       size_t line);

    friend void task_start_wrapper(spawn_args *a);
    friend void cleanup_task(cleanup_args *a);
    friend void reset_stack_limit_on_c_stack(reset_args *a);
    friend void new_stack_slow(new_stack_args *a);
    friend void rust_task_fail(rust_task *task,
                               char const *expr,
                               char const *file,
                               size_t line);

    friend class rust_port;
    friend class rust_port_selector;
    bool block_inner(rust_cond *on, const char* name);
    void wakeup_inner(rust_cond *from);
    bool blocked_on(rust_cond *cond);

public:

    // Only a pointer to 'name' is kept, so it must live as long as this task.
    rust_task(rust_sched_loop *sched_loop,
              rust_task_state state,
              const char *name,
              size_t init_stack_sz);

    void start(spawn_fn spawnee_fn,
               rust_opaque_box *env,
               void *args);
    void start();
    void assert_is_running();

    void *malloc(size_t sz, const char *tag, type_desc *td=0);
    void *realloc(void *data, size_t sz);
    void free(void *p);

    void set_state(rust_task_state state,
                   rust_cond *cond, const char* cond_name);

    bool block(rust_cond *on, const char* name);
    void wakeup(rust_cond *from);
    void die();

    // Print a backtrace, if the "bt" logging option is on.
    void backtrace();

    // Yields control to the scheduler. Called from the Rust stack
    void yield(bool *killed);

    // Fail this task (assuming caller-on-stack is different task).
    void kill();
    void kill_inner();

    // Indicates that we've been killed and now is an apropriate
    // time to fail as a result
    bool must_fail_from_being_killed();

    // Fail self, assuming caller-on-stack is this task.
    void fail();
    void fail(char const *expr, char const *file, size_t line);

    // Propagate failure to the entire rust runtime.
    // FIXME (#1868) (bblum): maybe this can be done at rust-level?
    void fail_sched_loop();

    frame_glue_fns *get_frame_glue_fns(uintptr_t fp);

    void *calloc(size_t size, const char *tag);

    // Use this function sparingly. Depending on the ref count is generally
    // not at all safe.
    intptr_t get_ref_count() const { return ref_count; }

    void notify(bool success);

    void *next_stack(size_t stk_sz, void *args_addr, size_t args_sz);
    void prev_stack();
    void record_stack_limit();
    void reset_stack_limit();

    bool on_rust_stack();
    void check_stack_canary();
    void delete_all_stacks();

    void config_notify(rust_port_id port);

    void call_on_c_stack(void *args, void *fn_ptr);
    void call_on_rust_stack(void *args, void *fn_ptr);
    bool have_c_stack() { return c_stack != NULL; }

    rust_port_selector *get_port_selector() { return &port_selector; }

    rust_task_state get_state() { return state; }
    rust_cond *get_cond() { return cond; }
    const char *get_cond_name() { return cond_name; }

    void clear_event_reject() {
        this->event_reject = false;
    }

    void *wait_event(bool *killed);
    void signal_event(void *event);

    void cleanup_after_turn();

    void inhibit_kill();
    void allow_kill();
};

// FIXME (#2697): It would be really nice to be able to get rid of this.
inline void *operator new[](size_t size, rust_task *task, const char *tag) {
    return task->malloc(size, tag);
}


template <typename T> struct task_owned {
    inline void *operator new(size_t size, rust_task *task,
                                             const char *tag) {
        return task->malloc(size, tag);
    }

    inline void *operator new[](size_t size, rust_task *task,
                                               const char *tag) {
        return task->malloc(size, tag);
    }

    inline void *operator new(size_t size, rust_task &task,
                                             const char *tag) {
        return task.malloc(size, tag);
    }

    inline void *operator new[](size_t size, rust_task &task,
                                               const char *tag) {
        return task.malloc(size, tag);
    }

    void operator delete(void *ptr) {
        ((T *)ptr)->task->free(ptr);
    }
};

// This stuff is on the stack-switching fast path

// Records the pointer to the end of the Rust stack in a platform-
// specific location in the thread control block
extern "C" CDECL void      record_sp_limit(void *limit);
extern "C" CDECL uintptr_t get_sp_limit();
// Gets a pointer to the vicinity of the current stack pointer
extern "C" uintptr_t       get_sp();

// This is the function that switches between the C and the Rust stack by
// calling another function with a single void* argument while changing the
// stack pointer. It has a funny name because gdb doesn't normally like to
// backtrace through split stacks (thinks it indicates a bug), but has a
// special case to allow functions named __morestack to move the stack pointer
// around.
extern "C" void __morestack(void *args, void *fn_ptr, uintptr_t stack_ptr);

inline static uintptr_t
sanitize_next_sp(uintptr_t next_sp) {

    // Since I'm not precisely sure where the next stack pointer sits in
    // relation to where the context switch actually happened, nor in relation
    // to the amount of stack needed for calling __morestack I've added some
    // extra bytes here.

    // FIXME (#2698): On the rust stack this potentially puts is quite far
    // into the red zone. Might want to just allocate a new rust stack every
    // time we switch back to rust.
    const uintptr_t padding = 16;

    return align_down(next_sp - padding);
}

inline void
rust_task::call_on_c_stack(void *args, void *fn_ptr) {
    // Too expensive to check
    // assert(on_rust_stack());

    // The shim functions generated by rustc contain the morestack prologue,
    // so we need to let them know they have enough stack.
    record_sp_limit(0);

    uintptr_t prev_rust_sp = next_rust_sp;
    next_rust_sp = get_sp();

    bool borrowed_a_c_stack = false;
    uintptr_t sp;
    if (c_stack == NULL) {
        c_stack = sched_loop->borrow_c_stack();
        next_c_sp = align_down(c_stack->end);
        sp = next_c_sp;
        borrowed_a_c_stack = true;
    } else {
        sp = sanitize_next_sp(next_c_sp);
    }

    __morestack(args, fn_ptr, sp);

    // Note that we may not actually get here if we threw an exception,
    // in which case we will return the c stack when the exception is caught.
    if (borrowed_a_c_stack) {
        return_c_stack();
    }

    next_rust_sp = prev_rust_sp;

    record_stack_limit();
}

inline void
rust_task::call_on_rust_stack(void *args, void *fn_ptr) {
    // Too expensive to check
    // assert(!on_rust_stack());

    // Because of the hack in the other function that disables the stack limit
    // when entering the C stack, here we restore the stack limit again.
    record_stack_limit();

    assert(get_sp_limit() != 0 && "Stack must be configured");
    assert(next_rust_sp);

    bool had_reentered_rust_stack = reentered_rust_stack;
    {
        // FIXME (#2875) This must be racy. Figure it out.
        scoped_lock with(lifecycle_lock);
        reentered_rust_stack = true;
    }

    uintptr_t prev_c_sp = next_c_sp;
    next_c_sp = get_sp();

    uintptr_t sp = sanitize_next_sp(next_rust_sp);

    // FIXME (#2047): There are times when this is called and needs
    // to be able to throw, and we don't account for that.
    __morestack(args, fn_ptr, sp);

    next_c_sp = prev_c_sp;
    {
        scoped_lock with(lifecycle_lock);
        reentered_rust_stack = had_reentered_rust_stack;
    }

    record_sp_limit(0);
}

inline void
rust_task::return_c_stack() {
    // Too expensive to check
    // assert(on_rust_stack());
    assert(c_stack != NULL);
    sched_loop->return_c_stack(c_stack);
    c_stack = NULL;
    next_c_sp = 0;
}

// NB: This runs on the Rust stack
inline void *
rust_task::next_stack(size_t stk_sz, void *args_addr, size_t args_sz) {
    new_stack_fast(stk_sz + args_sz);
    assert(stk->end - (uintptr_t)stk->data >= stk_sz + args_sz
      && "Did not receive enough stack");
    uint8_t *new_sp = (uint8_t*)stk->end;
    // Push the function arguments to the new stack
    new_sp = align_down(new_sp - args_sz);

    // I don't know exactly where the region ends that valgrind needs us
    // to mark accessible. On x86_64 these extra bytes aren't needed, but
    // on i386 we get errors without.
    const int fudge_bytes = 16;
    reuse_valgrind_stack(stk, new_sp - fudge_bytes);

    memcpy(new_sp, args_addr, args_sz);
    record_stack_limit();
    return new_sp;
}

// The amount of stack in a segment available to Rust code
inline size_t
user_stack_size(stk_seg *stk) {
    return (size_t)(stk->end
                    - (uintptr_t)&stk->data[0]
                    - RED_ZONE_SIZE);
}

struct new_stack_args {
    rust_task *task;
    size_t requested_sz;
};

void
new_stack_slow(new_stack_args *args);

// NB: This runs on the Rust stack
// This is the new stack fast path, in which we
// reuse the next cached stack segment
inline void
rust_task::new_stack_fast(size_t requested_sz) {
    // The minimum stack size, in bytes, of a Rust stack, excluding red zone
    size_t min_sz = sched_loop->min_stack_size;

    // Try to reuse an existing stack segment
    if (stk != NULL && stk->next != NULL) {
        size_t next_sz = user_stack_size(stk->next);
        if (min_sz <= next_sz && requested_sz <= next_sz) {
            stk = stk->next;
            return;
        }
    }

    new_stack_args args = {this, requested_sz};
    call_on_c_stack(&args, (void*)new_stack_slow);
}

// NB: This runs on the Rust stack
inline void
rust_task::prev_stack() {
    // We're not going to actually delete anything now because that would
    // require switching to the C stack and be costly. Instead we'll just move
    // up the link list and clean up later, either in new_stack or after our
    // turn ends on the scheduler.
    stk = stk->prev;
    record_stack_limit();
}

extern "C" CDECL void
record_sp_limit(void *limit);

// The LLVM-generated segmented-stack function prolog compares the amount of
// stack needed for each frame to the end-of-stack pointer stored in the
// TCB. As an optimization, when the frame size is less than 256 bytes, it
// will simply compare %esp to to the stack limit instead of subtracting the
// frame size. As a result we need our stack limit to account for those 256
// bytes.
const unsigned LIMIT_OFFSET = 256;

inline void
rust_task::record_stack_limit() {
    assert(stk);
    assert((uintptr_t)stk->end - RED_ZONE_SIZE
      - (uintptr_t)stk->data >= LIMIT_OFFSET
           && "Stack size must be greater than LIMIT_OFFSET");
    record_sp_limit(stk->data + LIMIT_OFFSET + RED_ZONE_SIZE);
}

inline rust_task* rust_get_current_task() {
    uintptr_t sp_limit = get_sp_limit();

    // FIXME (#1226) - Because of a hack in upcall_call_shim_on_c_stack this
    // value is sometimes inconveniently set to 0, so we can't use this
    // method of retreiving the task pointer and need to fall back to TLS.
    if (sp_limit == 0)
        return rust_sched_loop::get_task_tls();

    // The stack pointer boundary is stored in a quickly-accessible location
    // in the TCB. From that we can calculate the address of the stack segment
    // structure it belongs to, and in that structure is a pointer to the task
    // that owns it.
    uintptr_t seg_addr =
        sp_limit - RED_ZONE_SIZE - LIMIT_OFFSET - sizeof(stk_seg);
    stk_seg *stk = (stk_seg*) seg_addr;

    // Make sure we've calculated the right address
    ::check_stack_canary(stk);
    assert(stk->task != NULL && "task pointer not in stack structure");
    return stk->task;
}

//
// Local Variables:
// mode: C++
// fill-column: 78;
// indent-tabs-mode: nil
// c-basic-offset: 4
// buffer-file-coding-system: utf-8-unix
// End:
//

#endif /* RUST_TASK_H */