Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions Lib/test/test_capi/test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -1518,6 +1518,29 @@ class Foo:
Foo.attr = 0
self.assertFalse(ex.is_valid())

def test_guard_type_version_locked_removed(self):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def test_guard_type_version_locked_removed(self):
def test_guard_type_version_removed(self):

Please refer to R137's and R140's comments on optimizer_bytecodes.c

"""
Verify that redundant _GUARD_TYPE_VERSION_LOCKED guards are
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Verify that redundant _GUARD_TYPE_VERSION_LOCKED guards are
Verify that redundant _GUARD_TYPE_VERSION guards are

eliminated for sequential STORE_ATTR_INSTANCE_VALUE in __init__.
"""

class Foo:
def __init__(self):
self.a = 1
self.b = 2
self.c = 3

def thing(n):
for _ in range(n):
Foo()

res, ex = self._run_with_optimizer(thing, TIER2_THRESHOLD)
self.assertIsNotNone(ex)
opnames = list(iter_opnames(ex))
guard_locked_count = opnames.count("_GUARD_TYPE_VERSION_LOCKED")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
guard_locked_count = opnames.count("_GUARD_TYPE_VERSION_LOCKED")
guard_count = opnames.count("_GUARD_TYPE_VERSION")

# Only the first store needs the guard; the rest should be NOPed.
self.assertEqual(guard_locked_count, 1)

def test_type_version_doesnt_segfault(self):
"""
Tests that setting a type version doesn't cause a segfault when later looking at the stack.
Expand All @@ -1539,6 +1562,101 @@ def fn(a):

fn(A())

def test_init_resolves_callable(self):
"""
_CHECK_AND_ALLOCATE_OBJECT should resolve __init__ to a constant,
enabling the optimizer to propagate type information through the frame
and eliminate redundant function version and arg count checks.
"""
class MyPoint:
def __init__(self, x, y):
self.x = x
self.y = y

def testfunc(n):
total = 0.0
for _ in range(n):
p = MyPoint(1.0, 2.0)
total += p.x
return total

res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertAlmostEqual(res, TIER2_THRESHOLD * 1.0)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# The __init__ call should be traced through via _PUSH_FRAME
self.assertIn("_PUSH_FRAME", uops)
# __init__ resolution eliminates function version and arg checks
self.assertNotIn("_CHECK_FUNCTION_VERSION", uops)
self.assertNotIn("_CHECK_FUNCTION_EXACT_ARGS", uops)

def test_guard_type_version_locked_propagates(self):
"""
_GUARD_TYPE_VERSION_LOCKED should set the type version on the
symbol so repeated accesses to the same type can benefit.
"""
class Item:
def __init__(self, val):
self.val = val

def get(self):
return self.val

def get2(self):
return self.val + 1

def testfunc(n):
item = Item(42)
total = 0
for _ in range(n):
# Two method calls on the same object — the second
# should benefit from type info set by the first.
total += item.get() + item.get2()
return total

res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertEqual(res, TIER2_THRESHOLD * (42 + 43))
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# Both methods should be traced through
self.assertEqual(uops.count("_PUSH_FRAME"), 2)
# Type version propagation: one guard covers both method lookups
self.assertEqual(uops.count("_GUARD_TYPE_VERSION"), 1)
# Function checks eliminated (type info resolves the callable)
self.assertNotIn("_CHECK_FUNCTION_VERSION", uops)
self.assertNotIn("_CHECK_FUNCTION_EXACT_ARGS", uops)

def test_method_chain_guard_elimination(self):
"""
Calling two methods on the same object should share the outer
type guard — only one _GUARD_TYPE_VERSION for the two lookups.
"""
class Calc:
def __init__(self, val):
self.val = val

def add(self, x):
self.val += x
return self

def testfunc(n):
c = Calc(0)
for _ in range(n):
c.add(1).add(2)
return c.val

res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertEqual(res, TIER2_THRESHOLD * 3)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
# Both add() calls should be inlined
push_count = uops.count("_PUSH_FRAME")
self.assertEqual(push_count, 2)
# Only one outer type version guard for the two method lookups
# on the same object c (the second lookup reuses type info)
guard_version_count = uops.count("_GUARD_TYPE_VERSION")
self.assertEqual(guard_version_count, 1)

def test_func_guards_removed_or_reduced(self):
def testfunc(n):
for i in range(n):
Expand Down
5 changes: 5 additions & 0 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -2721,6 +2721,11 @@ dummy_func(
}

op(_GUARD_TYPE_VERSION_LOCKED, (type_version/2, owner -- owner)) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
op(_GUARD_TYPE_VERSION_LOCKED, (type_version/2, owner -- owner)) {
op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) {

Refer to R137's comments and R140's comments on optimizer_bytecodes.c

// Guard that type version matches expected value. Object is assumed to be
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Guard that type version matches expected value. Object is assumed to be

// locked on entry. If version matches, lock is retained for subsequent
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// locked on entry. If version matches, lock is retained for subsequent

// operations. If mismatch, unlock and exit (deopt). This allows the JIT
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// operations. If mismatch, unlock and exit (deopt). This allows the JIT

// optimizer to eliminate this guard entirely if type version is proven,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// optimizer to eliminate this guard entirely if type version is proven,

// in which case the lock is held for the entire trace duration.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// in which case the lock is held for the entire trace duration.

PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner);
assert(type_version != 0);
PyTypeObject *tp = Py_TYPE(owner_o);
Expand Down
35 changes: 33 additions & 2 deletions Python/optimizer_bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,21 @@ dummy_func(void) {
assert(!PyJitRef_IsUnique(value));
}

op(_GUARD_TYPE_VERSION_LOCKED, (type_version/2, owner -- owner)) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
op(_GUARD_TYPE_VERSION_LOCKED, (type_version/2, owner -- owner)) {
op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is in reference to the comment on R140

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not the full solution, but it's going towards that direction.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still need to add the unlock/deopt stuff, but I'm not sure how quite to go about it, I am still learning about the code here. Would be nice if someone could help me with that. Thanks!

assert(type_version);
if (sym_matches_type_version(owner, type_version)) {
ADD_OP(_NOP, 0, 0);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should not be removing this as we are moving towards FT compatibility. This uop unlocks objects on FT as well, so we need to keep it around as it's side effecting.

Instead, you should break out the _GUARD_TYPE_VERSION_LOCKED into _GUARD_TYPE_VERSION + UNLOCK. See for example the _LOCK_OBJECT op.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not fully understand. The unlock only happens when the type version doesn't match. If that cannot happen, there is no need to keep the unlock part or is there?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm renaming the opcode references from _GUARD_TYPE_VERSION_LOCKED to _GUARD_TYPE_VERSION but I'm not sure how to add the unlock part. I would like some help with that part.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not fully understand. The unlock only happens when the type version doesn't match. If that cannot happen, there is no need to keep the unlock part or is there?

There still is. Some op previously might have LOCK_OBJECT this uop (it probably did). So you still need the matching UNLOCK_OBJECT in FT.

} else {
PyTypeObject *type = _PyType_LookupByVersion(type_version);
if (type) {
if (sym_set_type_version(owner, type_version)) {
PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type);
_Py_BloomFilter_Add(dependencies, type);
}
}
}
}

op(_STORE_ATTR_INSTANCE_VALUE, (offset/1, value, owner -- o)) {
(void)offset;
(void)value;
Expand Down Expand Up @@ -1027,9 +1042,25 @@ dummy_func(void) {
}

op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) {
(void)type_version;
(void)args;
callable = sym_new_not_null(ctx);
PyTypeObject *type = _PyType_LookupByVersion(type_version);
if (type) {
PyHeapTypeObject *cls = (PyHeapTypeObject *)type;
PyObject *init = FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init);
if (init != NULL && PyFunction_Check(init)) {
// Propagate the __init__ function so _CREATE_INIT_FRAME can
// resolve the code object and continue optimizing.
callable = sym_new_const(ctx, init);
PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type);
_Py_BloomFilter_Add(dependencies, type);
}
else {
callable = sym_new_not_null(ctx);
}
}
else {
callable = sym_new_not_null(ctx);
}
self_or_null = sym_new_not_null(ctx);
}

Expand Down
34 changes: 32 additions & 2 deletions Python/optimizer_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading