1
1
mirror of https://github.com/bitcoin/bitcoin synced 2025-10-05 23:32:46 +02:00

Squashed 'src/ipc/libmultiprocess/' changes from 13424cf2ecc1..47d79db8a552

47d79db8a552 Merge bitcoin-core/libmultiprocess#201: bug: fix mptest hang, ProxyClient<Thread> deadlock in disconnect handler
f15ae9c9b9fb Merge bitcoin-core/libmultiprocess#211: Add .gitignore
4a269b21b8c8 bug: fix ProxyClient<Thread> deadlock if disconnected as IPC call is returning
85df96482c49 Use try_emplace in SetThread instead of threads.find
ca9b380ea91a Use std::optional in ConnThreads to allow shortening locks
9b0799113557 doc: describe ThreadContext struct and synchronization requirements
d60db601ed9b proxy-io.h: add Waiter::m_mutex thread safety annotations
4e365b019a9f ci: Use -Wthread-safety not -Wthread-safety-analysis
15d7bafbb001 Add .gitignore
fe1cd8c76131 Merge bitcoin-core/libmultiprocess#208: ci: Test minimum cmake version in olddeps job
b713a0b7bfbc Merge bitcoin-core/libmultiprocess#207: ci: output CMake version in CI script
0f580397c913 ci: Test minimum cmake version in olddeps job
d603dcc0eef0 ci: output CMake version in CI script

git-subtree-dir: src/ipc/libmultiprocess
git-subtree-split: 47d79db8a5528097b408e18f7b0bae11a6702d26
This commit is contained in:
Ryan Ofsky
2025-09-17 05:30:43 -04:00
parent a334bbe9b7
commit 535fa0ad0d
11 changed files with 189 additions and 78 deletions

5
.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
# CMake artifacts
/*build*
# Git artifacts
*.patch

View File

@@ -2,7 +2,7 @@ CI_DESC="CI job using LLVM-based libraries and tools (clang, libc++, clang-tidy,
CI_DIR=build-llvm
NIX_ARGS=(--arg enableLibcxx true)
export CXX=clang++
export CXXFLAGS="-Werror -Wall -Wextra -Wpedantic -Wthread-safety-analysis -Wno-unused-parameter"
export CXXFLAGS="-Werror -Wall -Wextra -Wpedantic -Wthread-safety -Wno-unused-parameter"
CMAKE_ARGS=(
-G Ninja
-DMP_ENABLE_CLANG_TIDY=ON

View File

@@ -1,5 +1,5 @@
CI_DESC="CI job using old Cap'n Proto version"
CI_DESC="CI job using old Cap'n Proto and cmake versions"
CI_DIR=build-olddeps
export CXXFLAGS="-Werror -Wall -Wextra -Wpedantic -Wno-unused-parameter -Wno-error=array-bounds"
NIX_ARGS=(--argstr capnprotoVersion "0.7.1")
NIX_ARGS=(--argstr capnprotoVersion "0.7.1" --argstr cmakeVersion "3.12.4")
BUILD_ARGS=(-k)

View File

@@ -1,7 +1,7 @@
CI_DESC="CI job running ThreadSanitizer"
CI_DIR=build-sanitize
export CXX=clang++
export CXXFLAGS="-ggdb -Werror -Wall -Wextra -Wpedantic -Wthread-safety-analysis -Wno-unused-parameter -fsanitize=thread"
export CXXFLAGS="-ggdb -Werror -Wall -Wextra -Wpedantic -Wthread-safety -Wno-unused-parameter -fsanitize=thread"
CMAKE_ARGS=()
BUILD_ARGS=(-k -j4)
BUILD_TARGETS=(mptest)

View File

@@ -17,6 +17,21 @@ fi
[ -n "${CI_CLEAN-}" ] && rm -rf "${CI_DIR}"
cmake -B "$CI_DIR" "${CMAKE_ARGS[@]+"${CMAKE_ARGS[@]}"}"
cmake --build "$CI_DIR" -t "${BUILD_TARGETS[@]}" -- "${BUILD_ARGS[@]+"${BUILD_ARGS[@]}"}"
ctest --test-dir "$CI_DIR" --output-on-failure
cmake --version
cmake_ver=$(cmake --version | awk '/version/{print $3; exit}')
ver_ge() { [ "$(printf '%s\n' "$2" "$1" | sort -V | head -n1)" = "$2" ]; }
src_dir=$PWD
mkdir -p "$CI_DIR"
cd "$CI_DIR"
cmake "$src_dir" "${CMAKE_ARGS[@]+"${CMAKE_ARGS[@]}"}"
if ver_ge "$cmake_ver" "3.15"; then
cmake --build . -t "${BUILD_TARGETS[@]}" -- "${BUILD_ARGS[@]+"${BUILD_ARGS[@]}"}"
else
# Older versions of cmake can only build one target at a time with --target,
# and do not support -t shortcut
for t in "${BUILD_TARGETS[@]}"; do
cmake --build . --target "$t" -- "${BUILD_ARGS[@]+"${BUILD_ARGS[@]}"}"
done
fi
ctest --output-on-failure

View File

@@ -66,8 +66,6 @@ struct ProxyClient<Thread> : public ProxyClientBase<Thread, ::capnp::Void>
ProxyClient(const ProxyClient&) = delete;
~ProxyClient();
void setDisconnectCallback(const std::function<void()>& fn);
//! Reference to callback function that is run if there is a sudden
//! disconnect and the Connection object is destroyed before this
//! ProxyClient<Thread> object. The callback will destroy this object and
@@ -285,16 +283,16 @@ struct Waiter
template <typename Fn>
void post(Fn&& fn)
{
const std::unique_lock<std::mutex> lock(m_mutex);
const Lock lock(m_mutex);
assert(!m_fn);
m_fn = std::forward<Fn>(fn);
m_cv.notify_all();
}
template <class Predicate>
void wait(std::unique_lock<std::mutex>& lock, Predicate pred)
void wait(Lock& lock, Predicate pred)
{
m_cv.wait(lock, [&] {
m_cv.wait(lock.m_lock, [&]() MP_REQUIRES(m_mutex) {
// Important for this to be "while (m_fn)", not "if (m_fn)" to avoid
// a lost-wakeup bug. A new m_fn and m_cv notification might be sent
// after the fn() call and before the lock.lock() call in this loop
@@ -317,9 +315,9 @@ struct Waiter
//! mutexes than necessary. This mutex can be held at the same time as
//! EventLoop::m_mutex as long as Waiter::mutex is locked first and
//! EventLoop::m_mutex is locked second.
std::mutex m_mutex;
Mutex m_mutex;
std::condition_variable m_cv;
std::optional<kj::Function<void()>> m_fn;
std::optional<kj::Function<void()>> m_fn MP_GUARDED_BY(m_mutex);
};
//! Object holding network & rpc state associated with either an incoming server
@@ -544,29 +542,73 @@ void ProxyServerBase<Interface, Impl>::invokeDestroy()
CleanupRun(m_context.cleanup_fns);
}
using ConnThreads = std::map<Connection*, ProxyClient<Thread>>;
//! Map from Connection to local or remote thread handle which will be used over
//! that connection. This map will typically only contain one entry, but can
//! contain multiple if a single thread makes IPC calls over multiple
//! connections. A std::optional value type is used to avoid the map needing to
//! be locked while ProxyClient<Thread> objects are constructed, see
//! ThreadContext "Synchronization note" below.
using ConnThreads = std::map<Connection*, std::optional<ProxyClient<Thread>>>;
using ConnThread = ConnThreads::iterator;
// Retrieve ProxyClient<Thread> object associated with this connection from a
// map, or create a new one and insert it into the map. Return map iterator and
// inserted bool.
std::tuple<ConnThread, bool> SetThread(ConnThreads& threads, std::mutex& mutex, Connection* connection, const std::function<Thread::Client()>& make_thread);
std::tuple<ConnThread, bool> SetThread(GuardedRef<ConnThreads> threads, Connection* connection, const std::function<Thread::Client()>& make_thread);
//! The thread_local ThreadContext g_thread_context struct provides information
//! about individual threads and a way of communicating between them. Because
//! it's a thread local struct, each ThreadContext instance is initialized by
//! the thread that owns it.
//!
//! ThreadContext is used for any client threads created externally which make
//! IPC calls, and for server threads created by
//! ProxyServer<ThreadMap>::makeThread() which execute IPC calls for clients.
//!
//! In both cases, the struct holds information like the thread name, and a
//! Waiter object where the EventLoop can post incoming IPC requests to execute
//! on the thread. The struct also holds ConnThread maps associating the thread
//! with local and remote ProxyClient<Thread> objects.
struct ThreadContext
{
//! Identifying string for debug.
std::string thread_name;
//! Waiter object used to allow client threads blocked waiting for a server
//! response to execute callbacks made from the client's corresponding
//! server thread.
//! Waiter object used to allow remote clients to execute code on this
//! thread. For server threads created by
//! ProxyServer<ThreadMap>::makeThread(), this is initialized in that
//! function. Otherwise, for client threads created externally, this is
//! initialized the first time the thread tries to make an IPC call. Having
//! a waiter is necessary for threads making IPC calls in case a server they
//! are calling expects them to execute a callback during the call, before
//! it sends a response.
//!
//! For IPC client threads, the Waiter pointer is never cleared and the Waiter
//! just gets destroyed when the thread does. For server threads created by
//! makeThread(), this pointer is set to null in the ~ProxyServer<Thread> as
//! a signal for the thread to exit and destroy itself. In both cases, the
//! same Waiter object is used across different calls and only created and
//! destroyed once for the lifetime of the thread.
std::unique_ptr<Waiter> waiter = nullptr;
//! When client is making a request to a server, this is the
//! `callbackThread` argument it passes in the request, used by the server
//! in case it needs to make callbacks into the client that need to execute
//! while the client is waiting. This will be set to a local thread object.
ConnThreads callback_threads;
//!
//! Synchronization note: The callback_thread and request_thread maps are
//! only ever accessed internally by this thread's destructor and externally
//! by Cap'n Proto event loop threads. Since it's possible for IPC client
//! threads to make calls over different connections that could have
//! different event loops, these maps are guarded by Waiter::m_mutex in case
//! different event loop threads add or remove map entries simultaneously.
//! However, individual ProxyClient<Thread> objects in the maps will only be
//! associated with one event loop and guarded by EventLoop::m_mutex. So
//! Waiter::m_mutex does not need to be held while accessing individual
//! ProxyClient<Thread> instances, and may even need to be released to
//! respect lock order and avoid locking Waiter::m_mutex before
//! EventLoop::m_mutex.
ConnThreads callback_threads MP_GUARDED_BY(waiter->m_mutex);
//! When client is making a request to a server, this is the `thread`
//! argument it passes in the request, used to control which thread on
@@ -575,7 +617,9 @@ struct ThreadContext
//! by makeThread. If a client call is being made from a thread currently
//! handling a server request, this will be set to the `callbackThread`
//! request thread argument passed in that request.
ConnThreads request_threads;
//!
//! Synchronization note: \ref callback_threads note applies here as well.
ConnThreads request_threads MP_GUARDED_BY(waiter->m_mutex);
//! Whether this thread is a capnp event loop thread. Not really used except
//! to assert false if there's an attempt to execute a blocking operation

View File

@@ -617,7 +617,7 @@ void clientInvoke(ProxyClient& proxy_client, const GetRequest& get_request, Fiel
const char* disconnected = nullptr;
proxy_client.m_context.loop->sync([&]() {
if (!proxy_client.m_context.connection) {
const std::unique_lock<std::mutex> lock(thread_context.waiter->m_mutex);
const Lock lock(thread_context.waiter->m_mutex);
done = true;
disconnected = "IPC client method called after disconnect.";
thread_context.waiter->m_cv.notify_all();
@@ -644,7 +644,7 @@ void clientInvoke(ProxyClient& proxy_client, const GetRequest& get_request, Fiel
} catch (...) {
exception = std::current_exception();
}
const std::unique_lock<std::mutex> lock(thread_context.waiter->m_mutex);
const Lock lock(thread_context.waiter->m_mutex);
done = true;
thread_context.waiter->m_cv.notify_all();
},
@@ -656,13 +656,13 @@ void clientInvoke(ProxyClient& proxy_client, const GetRequest& get_request, Fiel
proxy_client.m_context.loop->logPlain()
<< "{" << thread_context.thread_name << "} IPC client exception " << kj_exception;
}
const std::unique_lock<std::mutex> lock(thread_context.waiter->m_mutex);
const Lock lock(thread_context.waiter->m_mutex);
done = true;
thread_context.waiter->m_cv.notify_all();
}));
});
std::unique_lock<std::mutex> lock(thread_context.waiter->m_mutex);
Lock lock(thread_context.waiter->m_mutex);
thread_context.waiter->wait(lock, [&done]() { return done; });
if (exception) std::rethrow_exception(exception);
if (!kj_exception.empty()) proxy_client.m_context.loop->raise() << kj_exception;

View File

@@ -25,7 +25,7 @@ void CustomBuildField(TypeList<>,
// Also store the Thread::Client reference in the callback_threads map so
// future calls over this connection can reuse it.
auto [callback_thread, _]{SetThread(
thread_context.callback_threads, thread_context.waiter->m_mutex, &connection,
GuardedRef{thread_context.waiter->m_mutex, thread_context.callback_threads}, &connection,
[&] { return connection.m_threads.add(kj::heap<ProxyServer<Thread>>(thread_context, std::thread{})); })};
// Call remote ThreadMap.makeThread function so server will create a
@@ -43,12 +43,12 @@ void CustomBuildField(TypeList<>,
return request.send().getResult(); // Nonblocking due to capnp request pipelining.
}};
auto [request_thread, _1]{SetThread(
thread_context.request_threads, thread_context.waiter->m_mutex,
GuardedRef{thread_context.waiter->m_mutex, thread_context.request_threads},
&connection, make_request_thread)};
auto context = output.init();
context.setThread(request_thread->second.m_client);
context.setCallbackThread(callback_thread->second.m_client);
context.setThread(request_thread->second->m_client);
context.setCallbackThread(callback_thread->second->m_client);
}
//! PassField override for mp.Context arguments. Return asynchronously and call
@@ -89,29 +89,39 @@ auto PassField(Priority<1>, TypeList<>, ServerContext& server_context, const Fn&
// need to update the map.
auto& thread_context = g_thread_context;
auto& request_threads = thread_context.request_threads;
auto [request_thread, inserted]{SetThread(
request_threads, thread_context.waiter->m_mutex,
server.m_context.connection,
[&] { return context_arg.getCallbackThread(); })};
ConnThread request_thread;
bool inserted;
server.m_context.loop->sync([&] {
std::tie(request_thread, inserted) = SetThread(
GuardedRef{thread_context.waiter->m_mutex, request_threads}, server.m_context.connection,
[&] { return context_arg.getCallbackThread(); });
});
// If an entry was inserted into the requests_threads map,
// If an entry was inserted into the request_threads map,
// remove it after calling fn.invoke. If an entry was not
// inserted, one already existed, meaning this must be a
// recursive call (IPC call calling back to the caller which
// makes another IPC call), so avoid modifying the map.
const bool erase_thread{inserted};
KJ_DEFER(if (erase_thread) {
std::unique_lock<std::mutex> lock(thread_context.waiter->m_mutex);
// Call erase here with a Connection* argument instead
// of an iterator argument, because the `request_thread`
// iterator may be invalid if the connection is closed
// during this function call. More specifically, the
// iterator may be invalid because SetThread adds a
// cleanup callback to the Connection destructor that
// erases the thread from the map, and also because the
// ProxyServer<Thread> destructor calls
// request_threads.clear().
request_threads.erase(server.m_context.connection);
// Erase the request_threads entry on the event loop
// thread with loop->sync(), so if the connection is
// broken there is not a race between this thread and
// the disconnect handler trying to destroy the thread
// client object.
server.m_context.loop->sync([&] {
// Look up the thread again without using existing
// iterator since entry may no longer be there after
// a disconnect. Destroy node after releasing
// Waiter::m_mutex, so the ProxyClient<Thread>
// destructor is able to use EventLoop::mutex
// without violating lock order.
ConnThreads::node_type removed;
{
Lock lock(thread_context.waiter->m_mutex);
removed = request_threads.extract(server.m_context.connection);
}
});
});
fn.invoke(server_context, args...);
}

View File

@@ -182,6 +182,17 @@ public:
std::unique_lock<std::mutex> m_lock;
};
template<typename T>
struct GuardedRef
{
Mutex& mutex;
T& ref MP_GUARDED_BY(mutex);
};
// CTAD for Clang 16: GuardedRef{mutex, x} -> GuardedRef<decltype(x)>
template <class U>
GuardedRef(Mutex&, U&) -> GuardedRef<U>;
//! Analog to std::lock_guard that unlocks instead of locks.
template <typename Lock>
struct UnlockGuard

View File

@@ -3,6 +3,7 @@
, enableLibcxx ? false # Whether to use libc++ toolchain and libraries instead of libstdc++
, minimal ? false # Whether to create minimal shell without extra tools (faster when cross compiling)
, capnprotoVersion ? null
, cmakeVersion ? null
}:
let
@@ -37,12 +38,23 @@ let
capnproto = capnprotoBase.override (lib.optionalAttrs enableLibcxx { clangStdenv = llvm.libcxxStdenv; });
clang = if enableLibcxx then llvm.libcxxClang else llvm.clang;
clang-tools = llvm.clang-tools.override { inherit enableLibcxx; };
cmakeHashes = {
"3.12.4" = "sha256-UlVYS/0EPrcXViz/iULUcvHA5GecSUHYS6raqbKOMZQ=";
};
cmakeBuild = if cmakeVersion == null then pkgs.cmake else (pkgs.cmake.overrideAttrs (old: {
version = cmakeVersion;
src = pkgs.fetchurl {
url = "https://cmake.org/files/v${lib.versions.majorMinor cmakeVersion}/cmake-${cmakeVersion}.tar.gz";
hash = lib.attrByPath [cmakeVersion] "" cmakeHashes;
};
patches = [];
})).override { isMinimalBuild = true; };
in crossPkgs.mkShell {
buildInputs = [
capnproto
];
nativeBuildInputs = with pkgs; [
cmake
cmakeBuild
include-what-you-use
ninja
] ++ lib.optionals (!minimal) [

View File

@@ -12,6 +12,7 @@
#include <atomic>
#include <capnp/capability.h>
#include <capnp/common.h> // IWYU pragma: keep
#include <capnp/rpc.h>
#include <condition_variable>
#include <functional>
@@ -25,7 +26,6 @@
#include <kj/memory.h>
#include <map>
#include <memory>
#include <mutex>
#include <optional>
#include <stdexcept>
#include <string>
@@ -81,6 +81,11 @@ ProxyContext::ProxyContext(Connection* connection) : connection(connection), loo
Connection::~Connection()
{
// Connection destructor is always called on the event loop thread. If this
// is a local disconnect, it will trigger I/O, so this needs to run on the
// event loop thread, and if there was a remote disconnect, this is called
// by an onDisconnect callback directly from the event loop thread.
assert(std::this_thread::get_id() == m_loop->m_thread_id);
// Shut down RPC system first, since this will garbage collect any
// ProxyServer objects that were not freed before the connection was closed.
// Typically all ProxyServer objects associated with this connection will be
@@ -156,6 +161,9 @@ CleanupIt Connection::addSyncCleanup(std::function<void()> fn)
void Connection::removeSyncCleanup(CleanupIt it)
{
// Require cleanup functions to be removed on the event loop thread to avoid
// needing to deal with them being removed in the middle of a disconnect.
assert(std::this_thread::get_id() == m_loop->m_thread_id);
const Lock lock(m_loop->m_mutex);
m_sync_cleanup_fns.erase(it);
}
@@ -305,29 +313,34 @@ bool EventLoop::done() const
return m_num_clients == 0 && m_async_fns->empty();
}
std::tuple<ConnThread, bool> SetThread(ConnThreads& threads, std::mutex& mutex, Connection* connection, const std::function<Thread::Client()>& make_thread)
std::tuple<ConnThread, bool> SetThread(GuardedRef<ConnThreads> threads, Connection* connection, const std::function<Thread::Client()>& make_thread)
{
const std::unique_lock<std::mutex> lock(mutex);
auto thread = threads.find(connection);
if (thread != threads.end()) return {thread, false};
thread = threads.emplace(
std::piecewise_construct, std::forward_as_tuple(connection),
std::forward_as_tuple(make_thread(), connection, /* destroy_connection= */ false)).first;
thread->second.setDisconnectCallback([&threads, &mutex, thread] {
// Note: it is safe to use the `thread` iterator in this cleanup
// function, because the iterator would only be invalid if the map entry
// was removed, and if the map entry is removed the ProxyClient<Thread>
// destructor unregisters the cleanup.
assert(std::this_thread::get_id() == connection->m_loop->m_thread_id);
ConnThread thread;
bool inserted;
{
const Lock lock(threads.mutex);
std::tie(thread, inserted) = threads.ref.try_emplace(connection);
}
if (inserted) {
thread->second.emplace(make_thread(), connection, /* destroy_connection= */ false);
thread->second->m_disconnect_cb = connection->addSyncCleanup([threads, thread] {
// Note: it is safe to use the `thread` iterator in this cleanup
// function, because the iterator would only be invalid if the map entry
// was removed, and if the map entry is removed the ProxyClient<Thread>
// destructor unregisters the cleanup.
// Connection is being destroyed before thread client is, so reset
// thread client m_disconnect_cb member so thread client destructor does not
// try to unregister this callback after connection is destroyed.
// Remove connection pointer about to be destroyed from the map
const std::unique_lock<std::mutex> lock(mutex);
thread->second.m_disconnect_cb.reset();
threads.erase(thread);
});
return {thread, true};
// Connection is being destroyed before thread client is, so reset
// thread client m_disconnect_cb member so thread client destructor does not
// try to unregister this callback after connection is destroyed.
thread->second->m_disconnect_cb.reset();
// Remove connection pointer about to be destroyed from the map
const Lock lock(threads.mutex);
threads.ref.erase(thread);
});
}
return {thread, inserted};
}
ProxyClient<Thread>::~ProxyClient()
@@ -336,17 +349,18 @@ ProxyClient<Thread>::~ProxyClient()
// cleanup callback that was registered to handle the connection being
// destroyed before the thread being destroyed.
if (m_disconnect_cb) {
m_context.connection->removeSyncCleanup(*m_disconnect_cb);
// Remove disconnect callback on the event loop thread with
// loop->sync(), so if the connection is broken there is not a race
// between this thread trying to remove the callback and the disconnect
// handler attempting to call it.
m_context.loop->sync([&]() {
if (m_disconnect_cb) {
m_context.connection->removeSyncCleanup(*m_disconnect_cb);
}
});
}
}
void ProxyClient<Thread>::setDisconnectCallback(const std::function<void()>& fn)
{
assert(fn);
assert(!m_disconnect_cb);
m_disconnect_cb = m_context.connection->addSyncCleanup(fn);
}
ProxyServer<Thread>::ProxyServer(ThreadContext& thread_context, std::thread&& thread)
: m_thread_context(thread_context), m_thread(std::move(thread))
{
@@ -364,7 +378,7 @@ ProxyServer<Thread>::~ProxyServer()
assert(m_thread_context.waiter.get());
std::unique_ptr<Waiter> waiter;
{
const std::unique_lock<std::mutex> lock(m_thread_context.waiter->m_mutex);
const Lock lock(m_thread_context.waiter->m_mutex);
//! Reset thread context waiter pointer, as shutdown signal for done
//! lambda passed as waiter->wait() argument in makeThread code below.
waiter = std::move(m_thread_context.waiter);
@@ -398,7 +412,7 @@ kj::Promise<void> ProxyServer<ThreadMap>::makeThread(MakeThreadContext context)
g_thread_context.thread_name = ThreadName(m_connection.m_loop->m_exe_name) + " (from " + from + ")";
g_thread_context.waiter = std::make_unique<Waiter>();
thread_context.set_value(&g_thread_context);
std::unique_lock<std::mutex> lock(g_thread_context.waiter->m_mutex);
Lock lock(g_thread_context.waiter->m_mutex);
// Wait for shutdown signal from ProxyServer<Thread> destructor (signal
// is just waiter getting set to null.)
g_thread_context.waiter->wait(lock, [] { return !g_thread_context.waiter; });