diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 3fe1fdaa1589b6..9ab3c89d78c785 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -943,7 +943,7 @@ struct _is { struct types_state types; struct callable_cache callable_cache; PyObject *common_consts[NUM_COMMON_CONSTANTS]; - bool jit; + uint8_t jit; bool compiling; struct _PyExecutorObject *executor_list_head; struct _PyExecutorObject *executor_deletion_list_head; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index ced7e0d8af26a9..0141cf6c678f48 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -48,6 +48,8 @@ typedef struct _PyExitData { typedef struct _PyExecutorObject { PyObject_VAR_HEAD const _PyUOpInstruction *trace; + // The interpreter this executor belongs to. + PyInterpreterState *interp; _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ uint32_t exit_count; uint32_t code_size; diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h index 8faf7a4d403f84..d498f8f8e8d62c 100644 --- a/Include/internal/pycore_pylifecycle.h +++ b/Include/internal/pycore_pylifecycle.h @@ -129,6 +129,7 @@ PyAPI_FUNC(int) _PyInterpreterConfig_UpdateFromDict( PyInterpreterConfig *, PyObject *); +extern void _PyInterpreter_SetJitWithEnvVar(const PyConfig *config, PyInterpreterState *interp); #ifdef __cplusplus } diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 834a3d4b0a4408..c1a55c71d4e65c 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -9,9 +9,14 @@ import _opcode -from test.support import (script_helper, requires_specialization, - import_helper, Py_GIL_DISABLED, requires_jit_enabled, - reset_code) +from test.support import ( + script_helper, + import_helper, + Py_GIL_DISABLED, + requires_jit_enabled, + threading_helper, + reset_code +) _testinternalcapi = import_helper.import_module("_testinternalcapi") @@ -71,8 +76,6 @@ def count_ops(ex, name): return len([opname for opname in iter_opnames(ex) if opname == name]) -@requires_specialization -@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @requires_jit_enabled class TestExecutorInvalidation(unittest.TestCase): @@ -140,8 +143,6 @@ def f(): self.assertIsNone(exe) -@requires_specialization -@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @requires_jit_enabled @unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") class TestUops(unittest.TestCase): @@ -462,8 +463,35 @@ def testfunc(n, m): self.assertIn("_FOR_ITER_TIER_TWO", uops) -@requires_specialization -@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") +@requires_jit_enabled +@threading_helper.requires_working_threading() +@unittest.skipIf(not Py_GIL_DISABLED, "Requires FT and JIT") +class TestJitFreeThreading(unittest.TestCase): + def tests_reenabled_with_multiple_threads(self): + import threading + def testfunc(x, expected_value): + for i in range(x): + pass + + ex = get_first_executor(testfunc) + self.assertIsNone(ex) + # JIT + testfunc(TIER2_THRESHOLD+1, True) + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + # Spawn threads (turn off the JIT). + t = threading.Thread(target=lambda:None, args=()) + t.start() + t.join() + # JIT is invalidated after spawning threads. + ex = get_first_executor(testfunc) + self.assertIsNone(ex) + # JIT. + testfunc(TIER2_THRESHOLD+1, True) + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + + @requires_jit_enabled @unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") class TestUopsOptimization(unittest.TestCase): @@ -2185,6 +2213,7 @@ def testfunc(n): self.assertNotIn("_GUARD_TOS_INT", uops) self.assertIn("_POP_TOP_NOP", uops) + @unittest.skipIf(Py_GIL_DISABLED, "FT build immortalizes constants") def test_call_len_known_length_small_int(self): # Make sure that len(t) is optimized for a tuple of length 5. # See https://github.com/python/cpython/issues/139393. @@ -2209,6 +2238,7 @@ def testfunc(n): self.assertNotIn("_POP_CALL_LOAD_CONST_INLINE_BORROW", uops) self.assertNotIn("_POP_TOP_LOAD_CONST_INLINE_BORROW", uops) + @unittest.skipIf(Py_GIL_DISABLED, "FT build immortalizes constants") def test_call_len_known_length(self): # Make sure that len(t) is not optimized for a tuple of length 2048. # See https://github.com/python/cpython/issues/139393. @@ -3007,6 +3037,7 @@ def testfunc(n): self.assertIn("_POP_TOP_NOP", uops) + @unittest.skipIf(Py_GIL_DISABLED, "FT might immortalize this.") def test_pop_top_specialize_int(self): def testfunc(n): for _ in range(n): @@ -3020,6 +3051,7 @@ def testfunc(n): self.assertIn("_POP_TOP_INT", uops) + @unittest.skipIf(Py_GIL_DISABLED, "FT might immortalize this.") def test_pop_top_specialize_float(self): def testfunc(n): for _ in range(n): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-16-30-46.gh-issue-141594.PSsC5J.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-16-30-46.gh-issue-141594.PSsC5J.rst new file mode 100644 index 00000000000000..5db6330ca540b0 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-16-30-46.gh-issue-141594.PSsC5J.rst @@ -0,0 +1,3 @@ +Add free-threading support to the JIT. The JIT is only enabled on +single-threaded code in free-threading, and is disabled when multiple +threads are spawned. Patch by Ken Jin. diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 3aea2038fd17e7..d9b8cae58e188d 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -2433,6 +2433,7 @@ code_dealloc(PyObject *self) } #ifdef _Py_TIER2 _PyJit_Tracer_InvalidateDependency(tstate, self); + _Py_Executors_InvalidateDependency(tstate->interp, self, 1); if (co->co_executors != NULL) { clear_executors(co); } @@ -3363,8 +3364,12 @@ deopt_code_unit(PyCodeObject *code, int i) inst.op.code = _PyOpcode_Deopt[opcode]; assert(inst.op.code < MIN_SPECIALIZED_OPCODE); } - // JIT should not be enabled with free-threading - assert(inst.op.code != ENTER_EXECUTOR); + if (inst.op.code == ENTER_EXECUTOR) { + _PyExecutorObject *exec = code->co_executors->executors[inst.op.arg]; + assert(exec != NULL); + inst.op.code = exec->vm_data.opcode; + inst.op.arg = exec->vm_data.oparg; + } return inst; } diff --git a/Objects/funcobject.c b/Objects/funcobject.c index b659ac8023373b..d2ba19b442e3a3 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -298,7 +298,7 @@ functions is running. */ -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 static inline struct _func_version_cache_item * get_cache_item(PyInterpreterState *interp, uint32_t version) { @@ -315,11 +315,13 @@ _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version) // This should only be called from MAKE_FUNCTION. No code is specialized // based on the version, so we do not need to stop the world to set it. func->func_version = version; -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 PyInterpreterState *interp = _PyInterpreterState_GET(); + FT_MUTEX_LOCK(&interp->func_state.mutex); struct _func_version_cache_item *slot = get_cache_item(interp, version); slot->func = func; slot->code = func->func_code; + FT_MUTEX_UNLOCK(&interp->func_state.mutex); #endif } @@ -330,13 +332,15 @@ func_clear_version(PyInterpreterState *interp, PyFunctionObject *func) // Version was never set or has already been cleared. return; } -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 + FT_MUTEX_LOCK(&interp->func_state.mutex); struct _func_version_cache_item *slot = get_cache_item(interp, func->func_version); if (slot->func == func) { slot->func = NULL; // Leave slot->code alone, there may be use for it. } + FT_MUTEX_UNLOCK(&interp->func_state.mutex); #endif func->func_version = FUNC_VERSION_CLEARED; } @@ -358,8 +362,9 @@ _PyFunction_ClearVersion(PyFunctionObject *func) void _PyFunction_ClearCodeByVersion(uint32_t version) { -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 PyInterpreterState *interp = _PyInterpreterState_GET(); + FT_MUTEX_LOCK(&interp->func_state.mutex); struct _func_version_cache_item *slot = get_cache_item(interp, version); if (slot->code) { assert(PyCode_Check(slot->code)); @@ -369,15 +374,19 @@ _PyFunction_ClearCodeByVersion(uint32_t version) slot->func = NULL; } } + FT_MUTEX_UNLOCK(&interp->func_state.mutex); #endif } PyFunctionObject * _PyFunction_LookupByVersion(uint32_t version, PyObject **p_code) { -#ifdef Py_GIL_DISABLED +#ifndef _Py_TIER2 return NULL; #else + // This function does not need locking/atomics as it can only be + // called from the optimizer, which is currently disabled + // when there are multiple threads. PyInterpreterState *interp = _PyInterpreterState_GET(); struct _func_version_cache_item *slot = get_cache_item(interp, version); if (slot->code) { @@ -401,6 +410,10 @@ _PyFunction_LookupByVersion(uint32_t version, PyObject **p_code) uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func) { + // This function does not need locking/atomics as it can only be + // called from the specializing interpreter or optimizer. + // The specializing interpreter holds a strong reference to the function. + // The optimizer is currently disabled when there are multiple threads. return func->func_version; } diff --git a/Objects/listobject.c b/Objects/listobject.c index 4a98c8e54ab03f..20092e122cafc1 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -79,7 +79,9 @@ ensure_shared_on_resize(PyListObject *self) // We can't use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here because // the `CALL_LIST_APPEND` bytecode handler may lock the list without // a critical section. - assert(Py_REFCNT(self) == 1 || PyMutex_IsLocked(&_PyObject_CAST(self)->ob_mutex)); + assert(Py_REFCNT(self) == 1 || + (_Py_IsOwnedByCurrentThread((PyObject *)self) && !_PyObject_GC_IS_SHARED(self)) || + PyMutex_IsLocked(&_PyObject_CAST(self)->ob_mutex)); // Ensure that the list array is freed using QSBR if we are not the // owning thread. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 77e5c3e9f9ec95..4e7e65c7e1f64c 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1149,7 +1149,8 @@ static void set_version_unlocked(PyTypeObject *tp, unsigned int version) { assert(version == 0 || (tp->tp_versions_used != _Py_ATTR_CACHE_UNUSED)); -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 + ASSERT_TYPE_LOCK_HELD(); PyInterpreterState *interp = _PyInterpreterState_GET(); // lookup the old version and set to null if (tp->tp_version_tag != 0) { @@ -1158,6 +1159,8 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version) + (tp->tp_version_tag % TYPE_VERSION_CACHE_SIZE); *slot = NULL; } +#endif +#ifndef Py_GIL_DISABLED if (version) { tp->tp_versions_used++; } @@ -1167,7 +1170,7 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version) } #endif FT_ATOMIC_STORE_UINT_RELAXED(tp->tp_version_tag, version); -#ifndef Py_GIL_DISABLED +#if _Py_TIER2 if (version != 0) { PyTypeObject **slot = interp->types.type_version_cache @@ -1358,9 +1361,12 @@ _PyType_SetVersion(PyTypeObject *tp, unsigned int version) PyTypeObject * _PyType_LookupByVersion(unsigned int version) { -#ifdef Py_GIL_DISABLED +#ifndef _Py_TIER2 return NULL; #else + // This function does not need locking/atomics as it can only be + // called from the optimizer, which is currently disabled + // when there are multiple threads. PyInterpreterState *interp = _PyInterpreterState_GET(); PyTypeObject **slot = interp->types.type_version_cache diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 77dc82aa853ca4..97279445e2ec62 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2967,9 +2967,9 @@ dummy_func( }; specializing tier1 op(_SPECIALIZE_JUMP_BACKWARD, (--)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (this_instr->op.code == JUMP_BACKWARD) { - uint8_t desired = tstate->interp->jit ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT; + uint8_t desired = FT_ATOMIC_LOAD_UINT8(tstate->interp->jit) ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT; FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, desired); // Need to re-dispatch so the warmup counter isn't off by one: next_instr = this_instr; @@ -3312,11 +3312,9 @@ dummy_func( // Only used by Tier 2 op(_GUARD_NOT_EXHAUSTED_LIST, (iter, null_or_index -- iter, null_or_index)) { -#ifndef Py_GIL_DISABLED PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(list_o) == &PyList_Type); EXIT_IF((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)); -#endif } replaced op(_ITER_NEXT_LIST, (iter, null_or_index -- iter, null_or_index, next)) { @@ -5326,6 +5324,19 @@ dummy_func( } tier2 op(_CHECK_VALIDITY, (--)) { + // For FT: + // This doesn't need atomics (for now) as there is only a single time + // where a write from another thread is possible: + // when a new thread is spawned and it invalidates all current + // executors. + // The new thread can only be created by an executing uop prior to the + // _CHECK_VALIDITY check. New thread creation is synchronized by + // locking of the runtime, and the current thread is naturally + // paused/waiting for the new thread to be created. Thus, + // there is a strict happens-before relation between that + // uop's invalidation of validity and this check. + // So for now, while the JIT does not run on multiple threads, + // it is safe for this to be non-atomic. DEOPT_IF(!current_executor->vm_data.valid); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 1053c288bc4313..7230f15e66ab30 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -11129,7 +11129,6 @@ _PyStackRef iter; null_or_index = stack_pointer[-1]; iter = stack_pointer[-2]; - #ifndef Py_GIL_DISABLED PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(list_o) == &PyList_Type); if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)) { @@ -11137,7 +11136,6 @@ SET_CURRENT_CACHED_VALUES(0); JUMP_TO_JUMP_TARGET(); } - #endif _tos_cache1 = null_or_index; _tos_cache0 = iter; SET_CURRENT_CACHED_VALUES(2); @@ -11155,7 +11153,6 @@ _PyStackRef _stack_item_0 = _tos_cache0; null_or_index = _stack_item_0; iter = stack_pointer[-1]; - #ifndef Py_GIL_DISABLED PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(list_o) == &PyList_Type); if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)) { @@ -11164,7 +11161,6 @@ SET_CURRENT_CACHED_VALUES(1); JUMP_TO_JUMP_TARGET(); } - #endif _tos_cache1 = null_or_index; _tos_cache0 = iter; SET_CURRENT_CACHED_VALUES(2); @@ -11183,7 +11179,6 @@ _PyStackRef _stack_item_1 = _tos_cache1; null_or_index = _stack_item_1; iter = _stack_item_0; - #ifndef Py_GIL_DISABLED PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(list_o) == &PyList_Type); if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)) { @@ -11193,7 +11188,6 @@ SET_CURRENT_CACHED_VALUES(2); JUMP_TO_JUMP_TARGET(); } - #endif _tos_cache1 = null_or_index; _tos_cache0 = iter; SET_CURRENT_CACHED_VALUES(2); @@ -11211,7 +11205,6 @@ _PyStackRef _stack_item_2 = _tos_cache2; null_or_index = _stack_item_2; iter = _stack_item_1; - #ifndef Py_GIL_DISABLED PyObject *list_o = PyStackRef_AsPyObjectBorrow(iter); assert(Py_TYPE(list_o) == &PyList_Type); if ((size_t)PyStackRef_UntagInt(null_or_index) >= (size_t)PyList_GET_SIZE(list_o)) { @@ -11222,7 +11215,6 @@ SET_CURRENT_CACHED_VALUES(3); JUMP_TO_JUMP_TARGET(); } - #endif _tos_cache2 = null_or_index; _tos_cache1 = iter; _tos_cache0 = _stack_item_0; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 959b3a37e5b6fa..1628010856f280 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7598,9 +7598,9 @@ /* Skip 1 cache entry */ // _SPECIALIZE_JUMP_BACKWARD { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (this_instr->op.code == JUMP_BACKWARD) { - uint8_t desired = tstate->interp->jit ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT; + uint8_t desired = FT_ATOMIC_LOAD_UINT8(tstate->interp->jit) ? JUMP_BACKWARD_JIT : JUMP_BACKWARD_NO_JIT; FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, desired); next_instr = this_instr; DISPATCH_SAME_OPARG(); diff --git a/Python/optimizer.c b/Python/optimizer.c index 3c561a8a7fd0e8..e8ce0708b189da 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -140,7 +140,6 @@ _PyOptimizer_Optimize( } assert(!interp->compiling); assert(_tstate->jit_tracer_state->initial_state.stack_depth >= 0); -#ifndef Py_GIL_DISABLED assert(_tstate->jit_tracer_state->initial_state.func != NULL); interp->compiling = true; // The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must* @@ -198,9 +197,6 @@ _PyOptimizer_Optimize( } interp->compiling = false; return 1; -#else - return 0; -#endif } static _PyExecutorObject * @@ -473,7 +469,11 @@ static PyMethodDef uop_executor_methods[] = { static int executor_is_gc(PyObject *o) { +#ifdef Py_GIL_DISABLED + return 1; +#else return !_Py_IsImmortal(o); +#endif } PyTypeObject _PyUOpExecutor_Type = { @@ -1663,8 +1663,11 @@ unlink_executor(_PyExecutorObject *executor) prev->vm_data.links.next = next; } else { - // prev == NULL implies that executor is the list head - PyInterpreterState *interp = PyInterpreterState_Get(); + // prev == NULL often implies that executor is the list head + // Note that we should *not* get the current interpreter, as + // that may not always correspond to the interpreter this executor + // belongs to. + PyInterpreterState *interp = executor->interp; assert(interp->executor_list_head == executor); interp->executor_list_head = next; } @@ -1679,6 +1682,7 @@ _Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_s for (int i = 0; i < _Py_BLOOM_FILTER_WORDS; i++) { executor->vm_data.bloom.bits[i] = dependency_set->bits[i]; } + executor->interp = _PyInterpreterState_GET(); link_executor(executor); } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index e855df4977acf8..340d284ab15d42 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -115,7 +115,7 @@ convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj, bool pop) if (res == NULL) { return NULL; } - if (_Py_IsImmortal(res)) { + if (_Py_IsImmortal(res) || _PyObject_HasDeferredRefcount(res)) { inst->opcode = pop ? _POP_TOP_LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE_BORROW; } else { @@ -248,14 +248,14 @@ eliminate_pop_guard(_PyUOpInstruction *this_instr, bool exit) static JitOptRef lookup_attr(JitOptContext *ctx, _PyBloomFilter *dependencies, _PyUOpInstruction *this_instr, - PyTypeObject *type, PyObject *name, uint16_t immortal, + PyTypeObject *type, PyObject *name, uint16_t deferred_refcount, uint16_t mortal) { // The cached value may be dead, so we need to do the lookup again... :( if (type && PyType_Check(type)) { PyObject *lookup = _PyType_Lookup(type, name); if (lookup) { - int opcode = _Py_IsImmortal(lookup) ? immortal : mortal; + int opcode = _Py_IsImmortal(lookup) || _PyObject_HasDeferredRefcount(lookup) ? deferred_refcount : mortal; REPLACE_OP(this_instr, opcode, 0, (uintptr_t)lookup); PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type); _Py_BloomFilter_Add(dependencies, type); diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 88dbdb6d139c5f..99784566aced59 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1194,6 +1194,34 @@ run_presite(PyThreadState *tstate) } #endif +void +_PyInterpreter_SetJitWithEnvVar(const PyConfig *config, PyInterpreterState *interp) +{ + int enabled = 1; +#if _Py_TIER2 & 2 + enabled = 0; +#endif + char *env = Py_GETENV("PYTHON_JIT"); + if (env && *env != '\0') { + // PYTHON_JIT=0|1 overrides the default + enabled = *env != '0'; + } + if (enabled) { +#ifdef _Py_JIT + // perf profiler works fine with tier 2 interpreter, so + // only checking for a "real JIT". + if (config->perf_profiling > 0) { + (void)PyErr_WarnEx( + PyExc_RuntimeWarning, + "JIT deactivated as perf profiling support is active", + 0); + } else +#endif + { + FT_ATOMIC_STORE_UINT8(interp->jit, 1); + } + } +} static PyStatus init_interp_main(PyThreadState *tstate) @@ -1345,30 +1373,7 @@ init_interp_main(PyThreadState *tstate) // This is also needed when the JIT is enabled #ifdef _Py_TIER2 if (is_main_interp) { - int enabled = 1; -#if _Py_TIER2 & 2 - enabled = 0; -#endif - char *env = Py_GETENV("PYTHON_JIT"); - if (env && *env != '\0') { - // PYTHON_JIT=0|1 overrides the default - enabled = *env != '0'; - } - if (enabled) { -#ifdef _Py_JIT - // perf profiler works fine with tier 2 interpreter, so - // only checking for a "real JIT". - if (config->perf_profiling > 0) { - (void)PyErr_WarnEx( - PyExc_RuntimeWarning, - "JIT deactivated as perf profiling support is active", - 0); - } else -#endif - { - interp->jit = true; - } - } + _PyInterpreter_SetJitWithEnvVar(config, interp); } #endif @@ -1723,7 +1728,7 @@ finalize_modules(PyThreadState *tstate) PyInterpreterState *interp = tstate->interp; // Invalidate all executors and turn off JIT: - interp->jit = false; + FT_ATOMIC_STORE_UINT8(interp->jit, 0); interp->compiling = false; #ifdef _Py_TIER2 _Py_Executors_InvalidateAll(interp, 0); diff --git a/Python/pystate.c b/Python/pystate.c index b3d375a7feabb0..eb362ca141086e 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -567,7 +567,7 @@ init_interpreter(PyInterpreterState *interp, interp->monitoring_tool_versions[t] = 0; } interp->_code_object_generation = 0; - interp->jit = false; + interp->jit = 0; interp->compiling = false; interp->executor_list_head = NULL; interp->executor_deletion_list_head = NULL; @@ -1573,6 +1573,12 @@ add_threadstate(PyInterpreterState *interp, PyThreadState *tstate, { assert(interp->threads.head != tstate); if (next != NULL) { +#if defined(_Py_TIER2) && defined(Py_GIL_DISABLED) + FT_ATOMIC_STORE_UINT8(interp->jit, 0); + // There's more than one thread. In FT mode, + // disable the JIT completely for now. + _Py_Executors_InvalidateAll(interp, 1); +#endif assert(next->prev == NULL || next->prev == tstate); next->prev = tstate; } @@ -1870,6 +1876,13 @@ tstate_delete_common(PyThreadState *tstate, int release_gil) #if _Py_TIER2 _PyJit_TracerFree((_PyThreadStateImpl *)tstate); +# ifdef Py_GIL_DISABLED + // There's only one thread. Re-enable JIT. + PyThreadState *curr = interp->threads.head; + if (curr != NULL && curr->prev == NULL && curr->next == NULL) { + _PyInterpreter_SetJitWithEnvVar(_PyInterpreterState_GetConfig(interp), interp); + } +# endif #endif HEAD_UNLOCK(runtime); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 94eb3164ecad58..2da1609b604fc4 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2365,7 +2365,7 @@ sys_activate_stack_trampoline_impl(PyObject *module, const char *backend) { #ifdef PY_HAVE_PERF_TRAMPOLINE #ifdef _Py_JIT - if (_PyInterpreterState_GET()->jit) { + if (FT_ATOMIC_LOAD_UINT8(_PyInterpreterState_GET()->jit)) { PyErr_SetString(PyExc_ValueError, "Cannot activate the perf trampoline if the JIT is active"); return NULL; } @@ -4125,7 +4125,7 @@ _jit_is_enabled_impl(PyObject *module) /*[clinic end generated code: output=55865f8de993fe42 input=0524151e857f4f3a]*/ { (void)module; - return _PyInterpreterState_GET()->jit; + return FT_ATOMIC_LOAD_UINT8_RELAXED(_PyInterpreterState_GET()->jit); } /*[clinic input] diff --git a/Tools/jit/template.c b/Tools/jit/template.c index 3537c74a820365..90a4668f610275 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -118,6 +118,16 @@ do { \ #define ASSERT_WITHIN_STACK_BOUNDS(F, L) (void)0 #endif +// For now, the FT JIT only supports single-threaded code. +#undef LOCK_OBJECT +#undef UNLOCK_OBJECT +#define LOCK_OBJECT(op) (1) +#define UNLOCK_OBJECT(op) ((void)0) + +#ifdef Py_GIL_DISABLED +#undef Py_GIL_DISABLED +#endif + __attribute__((preserve_none)) _Py_CODEUNIT * _JIT_ENTRY( _PyExecutorObject *executor, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate,