From 5401deb474ca596ffbb23a12f4b70e6def1d0d65 Mon Sep 17 00:00:00 2001 From: Enrico Tassi Date: Fri, 17 Jun 2016 10:48:17 +0200 Subject: remote counter: avoid thread race on sockets (fix #4823) With par: the scenario is this one: coqide --- master ---- proof worker 1 (has no par: steps) ---- proof worker 2 (has a par: step) ---- tac worker 2.1 ---- tac worker 2.2 ---- tac worker 2.3 Actor 2 installs a remote counter for universe levels, that are requested to master. Multiple threads dealing with actors 2.x may need to get values from that counter at the same time. Long story short, in this complex scenario a mutex was missing and the control threads for 2.x were accessing the counter (hence reading/writing to the single socket connecting 2 with master at the same time, "corrupting" the data flow). A better solution would be to have a way to generate unique fresh universe levels locally to a worker. --- lib/remoteCounter.ml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/remoteCounter.ml b/lib/remoteCounter.ml index 3f1982594a..6cc48c8745 100644 --- a/lib/remoteCounter.ml +++ b/lib/remoteCounter.ml @@ -20,7 +20,7 @@ let new_counter ~name a ~incr ~build = let data = ref (ref a) in counters := (name, Obj.repr data) :: !counters; let m = Mutex.create () in - let mk_thsafe_getter f () = + let mk_thsafe_local_getter f () = (* - slaves must use a remote counter getter, not this one! *) (* - in the main process there is a race condition between slave managers (that are threads) and the main thread, hence the mutex *) @@ -28,11 +28,13 @@ let new_counter ~name a ~incr ~build = Errors.anomaly(Pp.str"Slave processes must install remote counters"); Mutex.lock m; let x = f () in Mutex.unlock m; build x in - let getter = ref(mk_thsafe_getter (fun () -> !data := incr !!data; !!data)) in + let mk_thsafe_remote_getter f () = + Mutex.lock m; let x = f () in Mutex.unlock m; x in + let getter = ref(mk_thsafe_local_getter (fun () -> !data := incr !!data; !!data)) in let installer f = if not (Flags.async_proofs_is_worker ()) then Errors.anomaly(Pp.str"Only slave processes can install a remote counter"); - getter := f in + getter := mk_thsafe_remote_getter f in (fun () -> !getter ()), installer let backup () = !counters -- cgit v1.2.3