auto merge of #10817 : alexcrichton/rust/sched-fix, r=brson
Right now, as pointed out in #8132, it is very easy to introduce a subtle race in the runtime. I believe that this is the cause of the current flakiness on the bots. I have taken the last idea mentioned in that issue which is to use a lock around descheduling and context switching in order to solve this race. Closes #8132
This commit is contained in:
commit
ad6f6cb589
@ -27,8 +27,10 @@ use borrow::{to_uint};
|
||||
use cell::Cell;
|
||||
use rand::{XorShiftRng, Rng, Rand};
|
||||
use iter::range;
|
||||
use unstable::mutex::Mutex;
|
||||
use vec::{OwnedVector};
|
||||
|
||||
|
||||
/// A scheduler is responsible for coordinating the execution of Tasks
|
||||
/// on a single thread. The scheduler runs inside a slightly modified
|
||||
/// Rust Task. When not running this task is stored in the scheduler
|
||||
@ -618,6 +620,12 @@ impl Scheduler {
|
||||
unsafe {
|
||||
let task: *mut Task = Local::unsafe_borrow();
|
||||
(*task).sched.get_mut_ref().run_cleanup_job();
|
||||
|
||||
// See the comments in switch_running_tasks_and_then for why a lock
|
||||
// is acquired here. This is the resumption points and the "bounce"
|
||||
// that it is referring to.
|
||||
(*task).nasty_deschedule_lock.lock();
|
||||
(*task).nasty_deschedule_lock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@ -671,6 +679,15 @@ impl Scheduler {
|
||||
/// This passes a Scheduler pointer to the fn after the context switch
|
||||
/// in order to prevent that fn from performing further scheduling operations.
|
||||
/// Doing further scheduling could easily result in infinite recursion.
|
||||
///
|
||||
/// Note that if the closure provided relinquishes ownership of the
|
||||
/// BlockedTask, then it is possible for the task to resume execution before
|
||||
/// the closure has finished executing. This would naturally introduce a
|
||||
/// race if the closure and task shared portions of the environment.
|
||||
///
|
||||
/// This situation is currently prevented, or in other words it is
|
||||
/// guaranteed that this function will not return before the given closure
|
||||
/// has returned.
|
||||
pub fn deschedule_running_task_and_then(mut ~self,
|
||||
f: |&mut Scheduler, BlockedTask|) {
|
||||
// Trickier - we need to get the scheduler task out of self
|
||||
@ -682,10 +699,29 @@ impl Scheduler {
|
||||
|
||||
pub fn switch_running_tasks_and_then(~self, next_task: ~Task,
|
||||
f: |&mut Scheduler, BlockedTask|) {
|
||||
// This is where we convert the BlockedTask-taking closure into one
|
||||
// that takes just a Task
|
||||
self.change_task_context(next_task, |sched, task| {
|
||||
f(sched, BlockedTask::block(task))
|
||||
// And here comes one of the sad moments in which a lock is used in a
|
||||
// core portion of the rust runtime. As always, this is highly
|
||||
// undesirable, so there's a good reason behind it.
|
||||
//
|
||||
// There is an excellent outline of the problem in issue #8132, and it's
|
||||
// summarized in that `f` is executed on a sched task, but its
|
||||
// environment is on the previous task. If `f` relinquishes ownership of
|
||||
// the BlockedTask, then it may introduce a race where `f` is using the
|
||||
// environment as well as the code after the 'deschedule' block.
|
||||
//
|
||||
// The solution we have chosen to adopt for now is to acquire a
|
||||
// task-local lock around this block. The resumption of the task in
|
||||
// context switching will bounce on the lock, thereby waiting for this
|
||||
// block to finish, eliminating the race mentioned above.
|
||||
//
|
||||
// To actually maintain a handle to the lock, we use an unsafe pointer
|
||||
// to it, but we're guaranteed that the task won't exit until we've
|
||||
// unlocked the lock so there's no worry of this memory going away.
|
||||
self.change_task_context(next_task, |sched, mut task| {
|
||||
let lock: *mut Mutex = &mut task.nasty_deschedule_lock;
|
||||
unsafe { (*lock).lock() }
|
||||
f(sched, BlockedTask::block(task));
|
||||
unsafe { (*lock).unlock() }
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -37,6 +37,7 @@ use rt::sched::{Scheduler, SchedHandle};
|
||||
use rt::stack::{StackSegment, StackPool};
|
||||
use send_str::SendStr;
|
||||
use unstable::finally::Finally;
|
||||
use unstable::mutex::Mutex;
|
||||
|
||||
// The Task struct represents all state associated with a rust
|
||||
// task. There are at this point two primary "subtypes" of task,
|
||||
@ -59,6 +60,9 @@ pub struct Task {
|
||||
// Dynamic borrowck debugging info
|
||||
borrow_list: Option<~[BorrowRecord]>,
|
||||
stdout_handle: Option<~Writer>,
|
||||
|
||||
// See the comments in the scheduler about why this is necessary
|
||||
nasty_deschedule_lock: Mutex,
|
||||
}
|
||||
|
||||
pub enum TaskType {
|
||||
@ -193,6 +197,7 @@ impl Task {
|
||||
task_type: SchedTask,
|
||||
borrow_list: None,
|
||||
stdout_handle: None,
|
||||
nasty_deschedule_lock: unsafe { Mutex::new() },
|
||||
}
|
||||
}
|
||||
|
||||
@ -227,6 +232,7 @@ impl Task {
|
||||
task_type: GreenTask(Some(home)),
|
||||
borrow_list: None,
|
||||
stdout_handle: None,
|
||||
nasty_deschedule_lock: unsafe { Mutex::new() },
|
||||
}
|
||||
}
|
||||
|
||||
@ -249,6 +255,7 @@ impl Task {
|
||||
task_type: GreenTask(Some(home)),
|
||||
borrow_list: None,
|
||||
stdout_handle: None,
|
||||
nasty_deschedule_lock: unsafe { Mutex::new() },
|
||||
}
|
||||
}
|
||||
|
||||
@ -391,6 +398,8 @@ impl Drop for Task {
|
||||
fn drop(&mut self) {
|
||||
rtdebug!("called drop for a task: {}", borrow::to_uint(self));
|
||||
rtassert!(self.destroyed);
|
||||
|
||||
unsafe { self.nasty_deschedule_lock.destroy(); }
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user