Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable FinalizationRegistry #3560

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions samples/helloworld_esm/config.capnp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ using Workerd = import "/workerd/workerd.capnp";
# instance of the workerd runtime. A single config file can contain multiple
# Workerd.Config definitions and must have at least one.
const helloWorldExample :Workerd.Config = (
v8Flags = [ "--expose-gc" ],

# Every workerd instance consists of a set of named services. A worker, for instance,
# is a type of service. Other types of services can include external servers, the
Expand Down
78 changes: 77 additions & 1 deletion samples/helloworld_esm/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,82 @@

export default {
async fetch(req, env) {
return new Response("Hello World\n");
let resp = await fetch("https://example.com");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't forget to revert this sample back to its original state when this is done. Most likely it would make sense to have this as a separate example.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like more of a test. Should it be converted to a wd-test?


// GC TEST
let cleanup0_call_count = 0;
let cleanup1_call_count = 0;

let cleanup0 = function (holdings) {
++cleanup0_call_count;
}

let cleanup1 = function (holdings) {
++cleanup1_call_count;
}

let fg0 = new FinalizationRegistry(cleanup0);
let fg1 = new FinalizationRegistry(cleanup1);

// Register 1 weak reference for each FinalizationRegistry and kill the
// objects they point to.
(function () {
// The objects need to be inside a closure so that we can reliably kill
// them.
let objects = [];
objects[0] = {};
objects[1] = {};
fg0.register(objects[0], "holdings0-0");
fg1.register(objects[1], "holdings1-0");
// Drop the references to the objects.
objects = [];
})();

// Schedule a GC, which will schedule both fg0 and fg1 for cleanup.
// Here and below, we need to invoke GC asynchronously and wait for it to
// finish, so that it doesn't need to scan the stack. Otherwise, the objects
// may not be reclaimed because of conservative stack scanning and the test
// may not work as intended.
let task_1_gc = (async function () {
gc({ type: 'major', execution: 'async' });

// Before the cleanup task has a chance to run, do the same thing again, so
// both FinalizationRegistries are (again) scheduled for cleanup. This has to
// be a IIFE function (so that we can reliably kill the objects) so we cannot
// use the same function as before.
(function () {
let objects = [];
objects[0] = {};
objects[1] = {};
fg0.register(objects[0], "holdings0-1");
fg1.register(objects[1], "holdings1-1");
objects = [];
})();
})();

// Schedule a second GC for execution after that, which will again schedule
// both fg0 and fg1 for cleanup.
let task_2_gc = (async function () {
gc({ type: 'major', execution: 'async' });

// Check that no cleanup task has had the chance to run yet.
console.log("Expected 0:" + cleanup0_call_count);
console.log("Expected 0:" + cleanup1_call_count);
})();

// Wait for the two GCs to be executed.
await task_1_gc;
await task_2_gc;

// Wait two ticks, so that the finalization registry cleanup has an
// opportunity to both run and re-post itself.
await new Promise(resolve=>setTimeout(resolve, 1000));
await new Promise(resolve=>setTimeout(resolve, 1000));

console.log("Expected 2:" + cleanup0_call_count);
console.log("Expected 2:" + cleanup1_call_count);
// GC TEST

return resp;
}
};
1 change: 1 addition & 0 deletions src/workerd/api/global-scope.c++
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ kj::Promise<DeferredProxy<void>> ServiceWorkerGlobalScope::request(kj::HttpMetho
return DeferredProxy<void>{promise.attach(kj::mv(adapter), kj::mv(client))};
}
} else KJ_IF_SOME(promise, event->getResponsePromise(lock)) {

auto body2 = kj::addRef(*ownRequestBody);

// HACK: If the client disconnects, the `response` reference is no longer valid. But our
Expand Down
19 changes: 19 additions & 0 deletions src/workerd/io/io-context.c++
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@

#include "io-context.h"

#include "libplatform/libplatform.h"
#include "v8-platform.h" // NOLINT(build/include_directory)
#include "v8config.h" // NOLINT(build/include_directory)
#include <workerd/io/io-gate.h>
#include <workerd/io/worker.h>
#include <workerd/jsg/jsg.h>
#include <workerd/util/sentry.h>
#include <workerd/util/uncaught-exception-source.h>
#include <workerd/jsg/setup.h>

#include <kj/debug.h>

Expand Down Expand Up @@ -1155,6 +1159,17 @@ void IoContext::runImpl(Runnable& runnable,
}
}
});

if (!isCurrentNull()) {
KJ_LOG(ERROR, "IoContext not-null before running PumpMessageLoop()");
} else {
worker->runInLockScope(lockType, [&](Worker::Lock& lock) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Taking a new lock here, immediately after releasing the previous one, is expensive. You should find a way to run PumpMessageLoop() just before releasing the previous lock instead.

jsg::Lock& js = lock;
auto& system = const_cast<jsg::V8System&>(js.getV8System());
KJ_DBG(js.v8Isolate);
while (v8::platform::PumpMessageLoop(&system.getDefaultPlatform(), js.v8Isolate, v8::platform::MessageLoopBehavior::kDoNotWait)) {}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It feels wrong for this code to be aware of the v8::Platform. Knowledge of how the platform is set up (and even the fact that we use the default platform) should be encapsulated inside JSG.

To that end, perhaps jsg::Lock should have a pumpMessageLoop() method, which internally calls v8::platform::PumpMessageLoop appropriately?

Copy link
Member

@kentonv kentonv Feb 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this actually the right time to call PumpMessageLoop -- at the end of every single ctx.run()?

Arguably it might be preferable if we invoked PumpMessageLoop asynchronously later on when we're not actively responding to a request.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that we likely need to find a better place for this. We can end up running ctx.run quite a few times during a request.

});
}
}

static constexpr auto kAsyncIoErrorMessage =
Expand Down Expand Up @@ -1182,6 +1197,10 @@ bool IoContext::hasCurrent() {
return threadLocalRequest != nullptr;
}

bool IoContext::isCurrentNull() {
return threadLocalRequest == nullptr;
}

bool IoContext::isCurrent() {
return this == threadLocalRequest;
}
Expand Down
3 changes: 3 additions & 0 deletions src/workerd/io/io-context.h
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,9 @@ class IoContext final: public kj::Refcounted, private kj::TaskSet::ErrorHandler
// True if there is a current IoContext for the thread (current() will not throw).
static bool hasCurrent();

// True if there is no IoContext for this thread
static bool isCurrentNull();

// True if this is the IoContext for the current thread (same as `hasCurrent() && tcx == current()`).
bool isCurrent();

Expand Down
4 changes: 4 additions & 0 deletions src/workerd/jsg/jsg.h
Original file line number Diff line number Diff line change
Expand Up @@ -2276,6 +2276,8 @@ class ExternalMemoryAdjustment final {
// Isolate<TypeWrapper>::Lock. Usually this is only done in top-level code, and the Lock is
// passed down to everyone else from there. See setup.h for details.

class V8System;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should be exposing V8System like this. The intent of JSG is to abstract V8 specifics away as much as possible and this just exposes them more. If we have to expose something here, which I'm unsure about, then the functionality should be folded into a new jsg::Lock method... like js.pumpMessageLoop() so that the details of interfacing with the v8 APIS do not need to leak more out to other areas.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I agree. js.pumpMessageLoop() was the intended plan before I sent this out for review 😅


class Lock {
public:
// The underlying V8 isolate, useful for directly calling V8 APIs. Hopefully, this is rarely
Expand All @@ -2288,6 +2290,8 @@ class Lock {
return context;
}

virtual const V8System& getV8System() = 0;

// Get the current Lock for the given V8 isolate. Segfaults if the isolate is not locked.
//
// This method is intended to be used in callbacks from V8 that pass an isolate pointer but
Expand Down
2 changes: 1 addition & 1 deletion src/workerd/jsg/resource.h
Original file line number Diff line number Diff line change
Expand Up @@ -1431,7 +1431,7 @@ class ResourceWrapper {
// We do not allow use of WeakRef or FinalizationRegistry because they introduce
// non-deterministic behavior.
check(global->Delete(context, v8StrIntern(isolate, "WeakRef"_kj)));
check(global->Delete(context, v8StrIntern(isolate, "FinalizationRegistry"_kj)));
//check(global->Delete(context, v8StrIntern(isolate, "FinalizationRegistry"_kj)));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'd be enabling WeakRef too right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current plan is to only enable FinalizationRegistry, given the immediate memory cleanup benefits for wasm users. WeakRef is something we can probably think about as a followup.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there actually anything more needed to enable WeakRef? If it's just a matter of removing the line above, I think we should do it in this change.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it should work by just removing the line, although @harrishancock was concerned about WeakRef giving immediate notification of GC collection instead of FinalizationRegistry which is more non-deterministic.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WeakRef does not give immediate notification thankfully. I suppose someone could poll it as quickly as possible to approximate immediate notification but thankfully it does not provide a notification api.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My concern isn't immediate notification, but lack of control over when notifications occur. With FinalizationRegistry, we can control exactly when finalization callbacks are scheduled. With WeakRef, we have no control over when they appear empty -- that's entirely up to the GC. So, if controlling the timing of GC observation is important for risk mitigation, then enabling WeakRef is strictly higher risk than enabling FinalizationRegistry.

That said, I'd love to just accept the risk and enable WeakRef and be done with it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm good with accepting the risk as I thinks it is likely quite minimal. @kentonv ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This probably needs a compat flag as I suspect there are people that check for the existence of these APIs and use them if they are available -- making them suddenly available would therefore cause such workers to start taking a new code path which could end up being broken.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that was something on my mind as well, had a brief discussion with @mikenomitch about placing this behind a compat date/flag.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes me sad, but yes, compat flag absolutely needed. I've seen such checks in the wild a few times now.


// Store a pointer to this object in slot 1, to be extracted in callbacks.
context->SetAlignedPointerInEmbedderData(1, ptr.get());
Expand Down
11 changes: 7 additions & 4 deletions src/workerd/jsg/setup.c++
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,14 @@ static kj::Own<v8::Platform> userPlatform(v8::Platform& platform) {
V8System::V8System(): V8System(defaultPlatform(0), nullptr) {}
V8System::V8System(kj::ArrayPtr<const kj::StringPtr> flags): V8System(defaultPlatform(0), flags) {}
V8System::V8System(v8::Platform& platformParam): V8System(platformParam, nullptr) {}
V8System::V8System(v8::Platform& platformParam, kj::ArrayPtr<const kj::StringPtr> flags)
: V8System(userPlatform(platformParam), flags) {}
V8System::V8System(kj::Own<v8::Platform> platformParam, kj::ArrayPtr<const kj::StringPtr> flags)
V8System::V8System(v8::Platform& platformParam, kj::ArrayPtr<const kj::StringPtr> flags, v8::Platform* defaultPlatformPtr)
: V8System(userPlatform(platformParam), flags, defaultPlatformPtr) {}
V8System::V8System(kj::Own<v8::Platform> platformParam, kj::ArrayPtr<const kj::StringPtr> flags, v8::Platform* defaultPlatformPtr)
: platformInner(kj::mv(platformParam)),
platformWrapper(*platformInner) {
platformWrapper(*platformInner), defaultPlatformPtr_{defaultPlatformPtr} {
if (!defaultPlatformPtr_) {
defaultPlatformPtr_ = platformInner.get();
}
#if V8_HAS_STACK_START_MARKER
v8::StackStartMarker::EnableForProcess();
#endif
Expand Down
15 changes: 13 additions & 2 deletions src/workerd/jsg/setup.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,19 +52,22 @@ class V8System {
explicit V8System(v8::Platform& platform);

// Use a possibly-custom v8::Platform implementation, and apply flags.
explicit V8System(v8::Platform& platform, kj::ArrayPtr<const kj::StringPtr> flags);
explicit V8System(v8::Platform& platform, kj::ArrayPtr<const kj::StringPtr> flags, v8::Platform* defuaultPlatformPtr = nullptr);

~V8System() noexcept(false);

typedef void FatalErrorCallback(kj::StringPtr location, kj::StringPtr message);
static void setFatalErrorCallback(FatalErrorCallback* callback);

auto& getDefaultPlatform() { return *defaultPlatformPtr_; }

private:
kj::Own<v8::Platform> platformInner;
V8PlatformWrapper platformWrapper;
friend class IsolateBase;
v8::Platform* defaultPlatformPtr_;

explicit V8System(kj::Own<v8::Platform>, kj::ArrayPtr<const kj::StringPtr>);
explicit V8System(kj::Own<v8::Platform>, kj::ArrayPtr<const kj::StringPtr>, v8::Platform* defaultPlatformPtr = nullptr);
};

// Base class of Isolate<T> containing parts that don't need to be templated, to avoid code
Expand All @@ -73,6 +76,10 @@ class IsolateBase {
public:
static IsolateBase& from(v8::Isolate* isolate);

auto& getV8System() {
return system;
}

// Unwraps a JavaScript exception as a kj::Exception.
virtual kj::Exception unwrapException(
v8::Local<v8::Context> context, v8::Local<v8::Value> exception) = 0;
Expand Down Expand Up @@ -504,6 +511,10 @@ class Isolate: public IsolateBase {
KJ_DISALLOW_COPY_AND_MOVE(Lock);
KJ_DISALLOW_AS_COROUTINE_PARAM;

virtual const V8System& getV8System() override {
return jsgIsolate.getV8System();
}

// Creates a `TypeHandler` for the given type. You can use this to convert between the type
// and V8 handles, as well as to allocate instances of the type on the V8 heap (if it is
// a resource type).
Expand Down
2 changes: 2 additions & 0 deletions src/workerd/server/v8-platform-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ class WorkerdPlatform final: public v8::Platform {
return inner.GetTracingController();
}

auto& getInner() { return inner; }

private:
v8::Platform& inner;
};
Expand Down
3 changes: 2 additions & 1 deletion src/workerd/server/workerd.c++
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <workerd/server/workerd-meta.capnp.h>
#include <workerd/server/workerd.capnp.h>
#include <workerd/util/autogate.h>
#include <v8-debug.h>

#include <fcntl.h>
#include <openssl/rand.h>
Expand Down Expand Up @@ -1329,7 +1330,7 @@ class CliMain final: public SchemaFileImpl::ErrorReporter {
auto platform = jsg::defaultPlatform(0);
WorkerdPlatform v8Platform(*platform);
jsg::V8System v8System(
v8Platform, KJ_MAP(flag, config.getV8Flags()) -> kj::StringPtr { return flag; });
v8Platform, KJ_MAP(flag, config.getV8Flags()) -> kj::StringPtr { return flag; }, platform.get());
auto promise = func(v8System, config);
KJ_IF_SOME(w, watcher) {
promise = promise.exclusiveJoin(waitForChanges(w).then([this]() {
Expand Down
Loading