-
-
Notifications
You must be signed in to change notification settings - Fork 34.4k
gh-148276: Optimize object creation and method calls in the JIT by resolving __init__ at trace optimization time #148277
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -1518,6 +1518,29 @@ class Foo: | |||||
| Foo.attr = 0 | ||||||
| self.assertFalse(ex.is_valid()) | ||||||
|
|
||||||
| def test_guard_type_version_locked_removed(self): | ||||||
| """ | ||||||
| Verify that redundant _GUARD_TYPE_VERSION_LOCKED guards are | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| eliminated for sequential STORE_ATTR_INSTANCE_VALUE in __init__. | ||||||
| """ | ||||||
|
|
||||||
| class Foo: | ||||||
| def __init__(self): | ||||||
| self.a = 1 | ||||||
| self.b = 2 | ||||||
| self.c = 3 | ||||||
|
|
||||||
| def thing(n): | ||||||
| for _ in range(n): | ||||||
| Foo() | ||||||
|
|
||||||
| res, ex = self._run_with_optimizer(thing, TIER2_THRESHOLD) | ||||||
| self.assertIsNotNone(ex) | ||||||
| opnames = list(iter_opnames(ex)) | ||||||
| guard_locked_count = opnames.count("_GUARD_TYPE_VERSION_LOCKED") | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| # Only the first store needs the guard; the rest should be NOPed. | ||||||
| self.assertEqual(guard_locked_count, 1) | ||||||
|
|
||||||
| def test_type_version_doesnt_segfault(self): | ||||||
| """ | ||||||
| Tests that setting a type version doesn't cause a segfault when later looking at the stack. | ||||||
|
|
@@ -1539,6 +1562,101 @@ def fn(a): | |||||
|
|
||||||
| fn(A()) | ||||||
|
|
||||||
| def test_init_resolves_callable(self): | ||||||
| """ | ||||||
| _CHECK_AND_ALLOCATE_OBJECT should resolve __init__ to a constant, | ||||||
| enabling the optimizer to propagate type information through the frame | ||||||
| and eliminate redundant function version and arg count checks. | ||||||
| """ | ||||||
| class MyPoint: | ||||||
| def __init__(self, x, y): | ||||||
| self.x = x | ||||||
| self.y = y | ||||||
|
|
||||||
| def testfunc(n): | ||||||
| total = 0.0 | ||||||
| for _ in range(n): | ||||||
| p = MyPoint(1.0, 2.0) | ||||||
| total += p.x | ||||||
| return total | ||||||
|
|
||||||
| res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) | ||||||
| self.assertAlmostEqual(res, TIER2_THRESHOLD * 1.0) | ||||||
| self.assertIsNotNone(ex) | ||||||
| uops = get_opnames(ex) | ||||||
| # The __init__ call should be traced through via _PUSH_FRAME | ||||||
| self.assertIn("_PUSH_FRAME", uops) | ||||||
| # __init__ resolution eliminates function version and arg checks | ||||||
| self.assertNotIn("_CHECK_FUNCTION_VERSION", uops) | ||||||
| self.assertNotIn("_CHECK_FUNCTION_EXACT_ARGS", uops) | ||||||
|
|
||||||
| def test_guard_type_version_locked_propagates(self): | ||||||
| """ | ||||||
| _GUARD_TYPE_VERSION_LOCKED should set the type version on the | ||||||
| symbol so repeated accesses to the same type can benefit. | ||||||
| """ | ||||||
| class Item: | ||||||
| def __init__(self, val): | ||||||
| self.val = val | ||||||
|
|
||||||
| def get(self): | ||||||
| return self.val | ||||||
|
|
||||||
| def get2(self): | ||||||
| return self.val + 1 | ||||||
|
|
||||||
| def testfunc(n): | ||||||
| item = Item(42) | ||||||
| total = 0 | ||||||
| for _ in range(n): | ||||||
| # Two method calls on the same object — the second | ||||||
| # should benefit from type info set by the first. | ||||||
| total += item.get() + item.get2() | ||||||
| return total | ||||||
|
|
||||||
| res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) | ||||||
| self.assertEqual(res, TIER2_THRESHOLD * (42 + 43)) | ||||||
| self.assertIsNotNone(ex) | ||||||
| uops = get_opnames(ex) | ||||||
| # Both methods should be traced through | ||||||
| self.assertEqual(uops.count("_PUSH_FRAME"), 2) | ||||||
| # Type version propagation: one guard covers both method lookups | ||||||
| self.assertEqual(uops.count("_GUARD_TYPE_VERSION"), 1) | ||||||
| # Function checks eliminated (type info resolves the callable) | ||||||
| self.assertNotIn("_CHECK_FUNCTION_VERSION", uops) | ||||||
| self.assertNotIn("_CHECK_FUNCTION_EXACT_ARGS", uops) | ||||||
|
|
||||||
| def test_method_chain_guard_elimination(self): | ||||||
| """ | ||||||
| Calling two methods on the same object should share the outer | ||||||
| type guard — only one _GUARD_TYPE_VERSION for the two lookups. | ||||||
| """ | ||||||
| class Calc: | ||||||
| def __init__(self, val): | ||||||
| self.val = val | ||||||
|
|
||||||
| def add(self, x): | ||||||
| self.val += x | ||||||
| return self | ||||||
|
|
||||||
| def testfunc(n): | ||||||
| c = Calc(0) | ||||||
| for _ in range(n): | ||||||
| c.add(1).add(2) | ||||||
| return c.val | ||||||
|
|
||||||
| res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) | ||||||
| self.assertEqual(res, TIER2_THRESHOLD * 3) | ||||||
| self.assertIsNotNone(ex) | ||||||
| uops = get_opnames(ex) | ||||||
| # Both add() calls should be inlined | ||||||
| push_count = uops.count("_PUSH_FRAME") | ||||||
| self.assertEqual(push_count, 2) | ||||||
| # Only one outer type version guard for the two method lookups | ||||||
| # on the same object c (the second lookup reuses type info) | ||||||
| guard_version_count = uops.count("_GUARD_TYPE_VERSION") | ||||||
| self.assertEqual(guard_version_count, 1) | ||||||
|
|
||||||
| def test_func_guards_removed_or_reduced(self): | ||||||
| def testfunc(n): | ||||||
| for i in range(n): | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -2721,6 +2721,11 @@ dummy_func( | |||||
| } | ||||||
|
|
||||||
| op(_GUARD_TYPE_VERSION_LOCKED, (type_version/2, owner -- owner)) { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Refer to R137's comments and R140's comments on optimizer_bytecodes.c |
||||||
| // Guard that type version matches expected value. Object is assumed to be | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| // locked on entry. If version matches, lock is retained for subsequent | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| // operations. If mismatch, unlock and exit (deopt). This allows the JIT | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| // optimizer to eliminate this guard entirely if type version is proven, | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| // in which case the lock is held for the entire trace duration. | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); | ||||||
| assert(type_version != 0); | ||||||
| PyTypeObject *tp = Py_TYPE(owner_o); | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -134,6 +134,21 @@ dummy_func(void) { | |||||
| assert(!PyJitRef_IsUnique(value)); | ||||||
| } | ||||||
|
|
||||||
| op(_GUARD_TYPE_VERSION_LOCKED, (type_version/2, owner -- owner)) { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is in reference to the comment on R140
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not the full solution, but it's going towards that direction.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Still need to add the unlock/deopt stuff, but I'm not sure how quite to go about it, I am still learning about the code here. Would be nice if someone could help me with that. Thanks! |
||||||
| assert(type_version); | ||||||
| if (sym_matches_type_version(owner, type_version)) { | ||||||
| ADD_OP(_NOP, 0, 0); | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should not be removing this as we are moving towards FT compatibility. This uop unlocks objects on FT as well, so we need to keep it around as it's side effecting. Instead, you should break out the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do not fully understand. The unlock only happens when the type version doesn't match. If that cannot happen, there is no need to keep the unlock part or is there?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm renaming the opcode references from
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There still is. Some op previously might have LOCK_OBJECT this uop (it probably did). So you still need the matching UNLOCK_OBJECT in FT. |
||||||
| } else { | ||||||
| PyTypeObject *type = _PyType_LookupByVersion(type_version); | ||||||
| if (type) { | ||||||
| if (sym_set_type_version(owner, type_version)) { | ||||||
| PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); | ||||||
| _Py_BloomFilter_Add(dependencies, type); | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| op(_STORE_ATTR_INSTANCE_VALUE, (offset/1, value, owner -- o)) { | ||||||
| (void)offset; | ||||||
| (void)value; | ||||||
|
|
@@ -1027,9 +1042,25 @@ dummy_func(void) { | |||||
| } | ||||||
|
|
||||||
| op(_CHECK_AND_ALLOCATE_OBJECT, (type_version/2, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) { | ||||||
| (void)type_version; | ||||||
| (void)args; | ||||||
| callable = sym_new_not_null(ctx); | ||||||
| PyTypeObject *type = _PyType_LookupByVersion(type_version); | ||||||
| if (type) { | ||||||
| PyHeapTypeObject *cls = (PyHeapTypeObject *)type; | ||||||
| PyObject *init = FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init); | ||||||
| if (init != NULL && PyFunction_Check(init)) { | ||||||
| // Propagate the __init__ function so _CREATE_INIT_FRAME can | ||||||
| // resolve the code object and continue optimizing. | ||||||
| callable = sym_new_const(ctx, init); | ||||||
| PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); | ||||||
| _Py_BloomFilter_Add(dependencies, type); | ||||||
| } | ||||||
| else { | ||||||
| callable = sym_new_not_null(ctx); | ||||||
| } | ||||||
| } | ||||||
| else { | ||||||
| callable = sym_new_not_null(ctx); | ||||||
| } | ||||||
| self_or_null = sym_new_not_null(ctx); | ||||||
| } | ||||||
|
|
||||||
|
|
||||||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please refer to R137's and R140's comments on optimizer_bytecodes.c