diff --git a/.ci/run-app-tests.sh b/.ci/run-app-tests.sh
index 27db98c1..a8f58002 100755
--- a/.ci/run-app-tests.sh
+++ b/.ci/run-app-tests.sh
@@ -26,7 +26,10 @@ test_app() {
exit_code=$?
# Check phase
- if echo "$output" | grep -qiE "(trap|exception|fault|panic|illegal|segfault)"; then
+ # Filter out expected PMP termination messages before crash detection
+ local filtered_output
+ filtered_output=$(echo "$output" | grep -v "\[PMP\] Task terminated")
+ if echo "$filtered_output" | grep -qiE "(trap|exception|fault|panic|illegal|segfault)"; then
echo "[!] Crash detected"
return 1
elif [ $exit_code -eq 124 ] || [ $exit_code -eq 0 ]; then
diff --git a/.ci/run-functional-tests.sh b/.ci/run-functional-tests.sh
index e66de5af..65d9b521 100755
--- a/.ci/run-functional-tests.sh
+++ b/.ci/run-functional-tests.sh
@@ -12,6 +12,7 @@ declare -A FUNCTIONAL_TESTS
FUNCTIONAL_TESTS["mutex"]="Fairness: PASS,Mutual Exclusion: PASS,Data Consistency: PASS,Overall: PASS"
FUNCTIONAL_TESTS["semaphore"]="Overall: PASS"
FUNCTIONAL_TESTS["umode"]="PASS: sys_tid() returned,PASS: sys_uptime() returned,[EXCEPTION] Illegal instruction"
+FUNCTIONAL_TESTS["pmp"]="[Task A] PASS: Stack integrity verified,[Task B] PASS: Stack integrity and checksum verified,[Task C] PASS: Stack array integrity verified,Status: PASS - All stacks remained isolated,[EXCEPTION] Store/AMO access fault"
#FUNCTIONAL_TESTS["test64"]="Unsigned Multiply: PASS,Unsigned Divide: PASS,Signed Multiply: PASS,Signed Divide: PASS,Left Shifts: PASS,Logical Right Shifts: PASS,Arithmetic Right Shifts: PASS,Overall: PASS"
#FUNCTIONAL_TESTS["suspend"]="Suspend: PASS,Resume: PASS,Self-Suspend: PASS,Overall: PASS"
@@ -76,8 +77,8 @@ test_functional_app() {
IFS=',' read -ra PASS_CRITERIA <<< "$expected_passes"
# Check for crashes first
- # Special case: umode test expects an illegal instruction exception
- if [ "$test" != "umode" ] && echo "$output" | grep -qiE "(trap|exception|fault|panic|illegal|segfault)"; then
+ # Special case: umode and pmp tests expect exceptions
+ if [ "$test" != "umode" ] && [ "$test" != "pmp" ] && echo "$output" | grep -qiE "(trap|exception|fault|panic|illegal|segfault)"; then
echo "[!] Crash detected"
# Mark all criteria as crashed
diff --git a/Documentation/hal-calling-convention.md b/Documentation/hal-calling-convention.md
index 6df5017f..97483c12 100644
--- a/Documentation/hal-calling-convention.md
+++ b/Documentation/hal-calling-convention.md
@@ -109,14 +109,14 @@ void hal_context_restore(jmp_buf env, int32_t val); /* Restore context + process
The ISR in `boot.c` performs a complete context save of all registers:
```
-Stack Frame Layout (144 bytes, 33 words × 4 bytes, offsets from sp):
+Stack Frame Layout (144 bytes, 36 words × 4 bytes, offsets from sp):
0: ra, 4: gp, 8: tp, 12: t0, 16: t1, 20: t2
24: s0, 28: s1, 32: a0, 36: a1, 40: a2, 44: a3
48: a4, 52: a5, 56: a6, 60: a7, 64: s2, 68: s3
72: s4, 76: s5, 80: s6, 84: s7, 88: s8, 92: s9
96: s10, 100:s11, 104:t3, 108: t4, 112: t5, 116: t6
-120: mcause, 124: mepc, 128: mstatus
-132-143: padding (12 bytes for 16-byte alignment)
+120: mcause, 124: mepc, 128: mstatus, 132: sp (for restore)
+136-143: padding (8 bytes for 16-byte alignment)
```
Why full context save in ISR?
@@ -127,12 +127,14 @@ Why full context save in ISR?
### ISR Stack Requirements
-Each task stack must reserve space for the ISR frame:
+Each task requires space for the ISR frame:
```c
-#define ISR_STACK_FRAME_SIZE 144 /* 33 words × 4 bytes, 16-byte aligned */
+#define ISR_STACK_FRAME_SIZE 144 /* 36 words × 4 bytes, 16-byte aligned */
```
-This "red zone" is reserved at the top of every task stack to guarantee ISR safety.
+**M-mode tasks**: This "red zone" is reserved at the top of the task stack to guarantee ISR safety.
+
+**U-mode tasks**: The ISR frame is allocated on the per-task kernel stack (512 bytes), not on the user stack. This provides stack isolation and prevents user tasks from corrupting kernel trap handling state.
## Function Calling in Linmo
@@ -181,7 +183,9 @@ void task_function(void) {
### Stack Layout
-Each task has its own stack with this layout:
+#### Machine Mode Tasks
+
+Each M-mode task has its own stack with this layout:
```
High Address
@@ -197,6 +201,43 @@ High Address
Low Address
```
+#### User Mode Tasks (Per-Task Kernel Stack)
+
+U-mode tasks maintain separate user and kernel stacks for isolation:
+
+**User Stack** (application execution):
+```
+High Address
++------------------+ <- user_stack_base + user_stack_size
+| |
+| User Stack | <- Grows downward
+| (Dynamic) | <- Task executes here in U-mode
+| |
++------------------+ <- user_stack_base
+Low Address
+```
+
+**Kernel Stack** (trap handling):
+```
+High Address
++------------------+ <- kernel_stack_base + kernel_stack_size (512 bytes)
+| ISR Frame | <- 144 bytes for trap context
+| (144 bytes) | <- Traps switch to this stack
++------------------+
+| Trap Handler | <- Kernel code execution during traps
+| Stack Space |
++------------------+ <- kernel_stack_base
+Low Address
+```
+
+When a U-mode task enters a trap (syscall, interrupt, exception):
+1. Hardware swaps SP with `mscratch` (containing kernel stack top)
+2. ISR saves full context to kernel stack
+3. Trap handler executes on kernel stack
+4. Return path restores user SP and switches back
+
+This dual-stack design prevents user tasks from corrupting kernel state and provides strong isolation between privilege levels.
+
### Stack Alignment
- 16-byte alignment: Required by RISC-V ABI for stack pointer
- 4-byte alignment: Minimum for all memory accesses on RV32I
diff --git a/Documentation/hal-riscv-context-switch.md b/Documentation/hal-riscv-context-switch.md
index f66a41f4..d274bae8 100644
--- a/Documentation/hal-riscv-context-switch.md
+++ b/Documentation/hal-riscv-context-switch.md
@@ -123,14 +123,26 @@ a complete interrupt service routine frame:
```c
void *hal_build_initial_frame(void *stack_top,
void (*task_entry)(void),
- int user_mode)
+ int user_mode,
+ void *kernel_stack,
+ size_t kernel_stack_size)
{
- /* Place frame in stack with initial reserve below for proper startup */
- uint32_t *frame = (uint32_t *) ((uint8_t *) stack_top - 256 -
- ISR_STACK_FRAME_SIZE);
+ /* For U-mode tasks, build frame on kernel stack for stack isolation.
+ * For M-mode tasks, build frame on user stack as before.
+ */
+ uint32_t *frame;
+ if (user_mode && kernel_stack) {
+ /* U-mode: Place frame on per-task kernel stack */
+ void *kstack_top = (uint8_t *) kernel_stack + kernel_stack_size;
+ frame = (uint32_t *) ((uint8_t *) kstack_top - ISR_STACK_FRAME_SIZE);
+ } else {
+ /* M-mode: Place frame on user stack with reserve below */
+ frame = (uint32_t *) ((uint8_t *) stack_top - 256 -
+ ISR_STACK_FRAME_SIZE);
+ }
/* Initialize all general purpose registers to zero */
- for (int i = 0; i < 32; i++)
+ for (int i = 0; i < 36; i++)
frame[i] = 0;
/* Compute thread pointer: aligned to 64 bytes from _end */
@@ -152,6 +164,18 @@ void *hal_build_initial_frame(void *stack_top,
/* Set entry point */
frame[FRAME_EPC] = (uint32_t) task_entry;
+ /* SP value for when ISR returns (stored in frame[33]).
+ * For U-mode: Set to user stack top.
+ * For M-mode: Set to frame + ISR_STACK_FRAME_SIZE.
+ */
+ if (user_mode && kernel_stack) {
+ /* U-mode: frame[33] should contain user SP */
+ frame[FRAME_SP] = (uint32_t) ((uint8_t *) stack_top - 256);
+ } else {
+ /* M-mode: frame[33] contains kernel SP after frame deallocation */
+ frame[FRAME_SP] = (uint32_t) ((uint8_t *) frame + ISR_STACK_FRAME_SIZE);
+ }
+
return frame; /* Return frame base as initial stack pointer */
}
```
diff --git a/Documentation/pmp-memory-protection.md b/Documentation/pmp-memory-protection.md
new file mode 100644
index 00000000..03ba6ae7
--- /dev/null
+++ b/Documentation/pmp-memory-protection.md
@@ -0,0 +1,293 @@
+# PMP: Memory Protection
+
+## Overview
+
+Linmo operates entirely in Machine mode by default, with all tasks sharing the same physical address space.
+A misbehaving task can corrupt kernel data structures or interfere with other tasks, compromising system stability.
+
+Physical Memory Protection provides hardware-enforced access control at the physical address level.
+Unlike an MMU, PMP requires no page tables or TLB management, making it suitable for resource-constrained RISC-V systems.
+PMP enforces read, write, and execute permissions for up to 16 configurable memory regions.
+
+The design draws inspiration from the F9 microkernel, adopting a three-layer abstraction:
+- **Memory Pools** define static physical regions at boot time, derived from linker symbols.
+- **Flexpages** represent dynamically protected memory ranges with associated permissions.
+- **Memory Spaces** group flexpages into per-task protection domains.
+
+## Architecture
+
+### Memory Abstraction Layers
+
+```mermaid
+graph TD
+ classDef hw fill:#424242,stroke:#000,color:#fff,stroke-width:2px
+ classDef static fill:#e1f5fe,stroke:#01579b,stroke-width:2px
+ classDef dynamic fill:#fff3e0,stroke:#e65100,stroke-width:2px
+ classDef container fill:#e8f5e9,stroke:#1b5e20,stroke-width:2px
+ classDef task fill:#f3e5f5,stroke:#6a1b9a,stroke-width:2px
+
+ subgraph L0 ["Hardware"]
+ PMP[PMP Registers]:::hw
+ end
+
+ subgraph L1 ["Memory Pools"]
+ MP["Static Regions
(.text, .data, .bss)"]:::static
+ end
+
+ subgraph L2 ["Flexpages"]
+ FP["fpage_t
base / size / rwx"]:::dynamic
+ end
+
+ subgraph L3 ["Memory Spaces"]
+ AS["memspace_t
per-task domain"]:::container
+ end
+
+ subgraph L4 ["Task"]
+ TCB[TCB]:::task
+ end
+
+ TCB -->|owns| AS
+ AS -->|contains| FP
+ MP -->|initializes| FP
+ AS -->|configures| PMP
+```
+
+The core structures:
+
+```c
+typedef struct fpage {
+ struct fpage *as_next; /* Next in address space list */
+ struct fpage *map_next; /* Next in mapping chain */
+ struct fpage *pmp_next; /* Next in PMP queue */
+ uint32_t base; /* Physical base address */
+ uint32_t size; /* Region size */
+ uint32_t rwx; /* R/W/X permission bits */
+ uint32_t pmp_id; /* PMP region index */
+ uint32_t flags; /* Status flags */
+ uint32_t priority; /* Eviction priority */
+ int used; /* Usage counter */
+} fpage_t;
+```
+```c
+typedef struct memspace {
+ uint32_t as_id; /* Memory space identifier */
+ struct fpage *first; /* Head of flexpage list */
+ struct fpage *pmp_first; /* Head of PMP-loaded list */
+ struct fpage *pmp_stack; /* Stack regions */
+ uint32_t shared; /* Shared flag */
+} memspace_t;
+```
+
+### TOR Mode and Paired Entries
+
+TOR (Top Of Range) mode defines region *i* as `[pmpaddr[i-1], pmpaddr[i])`.
+This works well for contiguous kernel regions where boundaries naturally chain together.
+
+For dynamically allocated user regions at arbitrary addresses, Linmo uses paired entries:
+
+```
+┌─────────────────────────────────────────┐
+│ Entry N: base_addr (disabled) │
+│ Entry N+1: top_addr (TOR, R|W) │
+│ │
+│ Region N+1 = [base_addr, top_addr) │
+└─────────────────────────────────────────┘
+```
+
+The first entry sets the lower bound with permissions disabled.
+The second entry defines the upper bound with TOR mode and the desired permissions.
+This consumes two hardware slots per user region but allows non-contiguous regions at arbitrary addresses.
+
+### Kernel and User Regions
+
+Kernel regions protect `.text`, `.data`, and `.bss` sections:
+
+```c
+static const mempool_t kernel_mempools[] = {
+ DECLARE_MEMPOOL("kernel_text",
+ &_stext, &_etext,
+ PMPCFG_PERM_RX,
+ PMP_PRIORITY_KERNEL),
+ DECLARE_MEMPOOL("kernel_data",
+ &_sdata, &_edata,
+ PMPCFG_PERM_RW,
+ PMP_PRIORITY_KERNEL),
+ DECLARE_MEMPOOL("kernel_bss",
+ &_sbss, &_ebss,
+ PMPCFG_PERM_RW,
+ PMP_PRIORITY_KERNEL),
+};
+```
+
+Kernel heap and stack are intentionally excluded—PMP is ineffective for M-mode, and kernel heap/stack is only used in M-mode.
+This keeps Regions 0-2 for kernel, leaving Region 3+ available for user dynamic regions with correct TOR address ordering.
+
+Kernel regions use a hybrid lock strategy:
+
+| Lock Type | Location | Effect |
+|-----------|---------------------------|-------------------------|
+| Software | `regions[i].locked = 1` | Allocator skips slot |
+| Hardware | `PMPCFG_L` NOT set | M-mode access preserved |
+
+Setting the hardware lock bit would deny M-mode access.
+
+User regions protect task stacks and are dynamically loaded during context switches.
+When PMP slots are exhausted, user regions can be evicted and reloaded on demand.
+
+## Memory Isolation
+
+### Context Switching
+
+Context switching reconfigures PMP in two phases:
+
+```mermaid
+flowchart LR
+ subgraph Eviction
+ E1[Iterate pmp_first] --> E2[Disable region in hardware]
+ E2 --> E3["Set pmp_id = INVALID"]
+ end
+ subgraph Loading
+ L1[Reset pmp_first = NULL] --> L2{Already loaded?}
+ L2 -->|Yes| L3[Add to tracking list]
+ L2 -->|No| L4[Find free slot]
+ L4 --> L5[Load to hardware]
+ L5 --> L3
+ end
+ Eviction --> Loading
+```
+
+**Eviction phase** iterates the outgoing task's `pmp_first` linked list.
+Each flexpage is disabled in hardware, and `pmp_id` is set to `PMP_INVALID_REGION (0xFF)` to mark it as unloaded.
+
+**Loading phase** rebuilds `pmp_first` from scratch.
+This prevents circular references—if `pmp_first` is not cleared, reloading a flexpage could create a self-loop in the linked list.
+For each flexpage in the incoming task's memory space:
+- **Already loaded** (shared regions): Add directly to tracking list
+- **Not loaded**: Find a free slot via `find_free_region_slot()` and load
+
+If all slots are occupied, remaining regions load on-demand through the fault handler (lazy loading).
+
+### Per-Task Kernel Stack
+
+U-mode trap handling requires a kernel stack to save context.
+If multiple U-mode tasks share a single kernel stack, Task A's context frame is overwritten when Task B traps—the ISR writes to the same position on the shared stack.
+
+Linmo allocates a dedicated 512-byte kernel stack for each U-mode task:
+
+```c
+typedef struct tcb {
+ /* ... */
+ void *kernel_stack; /* Base address of kernel stack (NULL for M-mode) */
+ size_t kernel_stack_size; /* Size of kernel stack in bytes (0 for M-mode) */
+} tcb_t;
+```
+
+M-mode tasks do not require a separate kernel stack—they use the task stack directly without privilege transition.
+
+During context switch, the scheduler saves the incoming task's kernel stack top to a global variable.
+The ISR restore path loads this value into `mscratch`, enabling the next U-mode trap to use the correct per-task kernel stack.
+
+### Fault Handling and Task Termination
+
+PMP access faults occur when a U-mode task attempts to access memory outside its loaded regions.
+The trap handler routes these faults to the PMP fault handler, which attempts recovery or terminates the task.
+
+The fault handler first searches the task's memory space for a flexpage containing the faulting address.
+If found and the flexpage is not currently loaded in hardware, it loads the region and returns to the faulting instruction.
+This enables lazy loading—regions not loaded during context switch are loaded on first access.
+
+If no matching flexpage exists, the access is unauthorized (e.g., kernel memory or another task's stack).
+If the flexpage is already loaded but still faulted, recovery is impossible.
+In either case, the handler marks the task as `TASK_ZOMBIE` and returns a termination code.
+
+```mermaid
+flowchart TD
+ A[Find flexpage for fault_addr] --> B{Flexpage found?}
+ B -->|No| F[Unauthorized access]
+ B -->|Yes| C{Already loaded in hardware?}
+ C -->|No| D[Load to hardware]
+ D --> E[Return RECOVERED]
+ C -->|Yes| F
+ F --> G[Mark TASK_ZOMBIE]
+ G --> H[Return TERMINATE]
+```
+
+The trap handler interprets the return value:
+
+| Return Code | Action |
+|-------------------------|-----------------------------------------------|
+| `PMP_FAULT_RECOVERED` | Resume execution at faulting instruction |
+| `PMP_FAULT_TERMINATE` | Print diagnostic, invoke dispatcher |
+| `PMP_FAULT_UNHANDLED` | Fall through to default exception handler |
+
+Terminated tasks are not immediately destroyed.
+The dispatcher calls a cleanup routine before selecting the next runnable task.
+This routine iterates zombie tasks, evicts their PMP regions, frees their memory spaces and stacks, and removes them from the task list.
+Deferring cleanup to the dispatcher avoids modifying task structures from within interrupt context.
+
+## Best Practices
+
+### Hardware Limitations
+
+PMP provides 16 hardware slots shared between kernel and user regions.
+Kernel regions occupy slots 0-2 and cannot be evicted.
+Each user region requires two slots (paired entries for TOR mode).
+
+| Resource | Limit |
+|-----------------------------|----------------------------|
+| Total PMP slots | 16 |
+| Kernel slots | 3 (fixed at boot) |
+| Slots per user region | 2 (paired entries) |
+| Max concurrent user regions | ~6 |
+
+Systems with many U-mode tasks should minimize per-task region requirements.
+Tasks exceeding available slots rely on lazy loading, which incurs fault handler overhead on first access.
+
+### Task Creation Guidelines
+
+U-mode tasks receive automatic PMP protection.
+The kernel allocates a memory space and registers the task stack as a protected flexpage:
+
+```c
+/* M-mode task: no isolation, full memory access */
+mo_task_spawn(task_func, stack_size);
+
+/* U-mode task: PMP protected, memory space auto-created */
+mo_task_spawn_user(task_func, stack_size);
+```
+
+Choose the appropriate privilege level:
+- **M-mode**: Trusted kernel tasks, drivers requiring full memory access
+- **U-mode**: Application tasks, untrusted or potentially buggy code
+
+### Common Pitfalls
+
+1. Assuming PMP protects the kernel
+
+ PMP only restricts Supervisor and User modes.
+ Machine mode has unrestricted access regardless of PMP configuration.
+ This is intentional—the kernel must access all memory to manage protection.
+
+ ```c
+ /* This code in M-mode bypasses PMP entirely */
+ void kernel_func(void) {
+ volatile uint32_t *user_stack = (uint32_t *)0x80007000;
+ *user_stack = 0; /* No fault—M-mode ignores PMP */
+ }
+ ```
+
+ PMP protects user tasks from each other but does not protect the kernel from itself.
+
+2. Exhausting PMP slots
+
+ With only ~6 user regions available, spawning many U-mode tasks causes PMP slot exhaustion.
+ Subsequent tasks rely entirely on lazy loading, degrading performance.
+
+3. Mixing M-mode and U-mode incorrectly
+
+ M-mode tasks spawned with `mo_task_spawn()` do not receive memory spaces.
+ PMP-related functions check for NULL memory spaces and return early, so calling them on M-mode tasks has no effect.
+
+## References
+
+- [Memory Protection for Embedded RISC-V Systems](https://nva.sikt.no/registration/0198eb345173-b2a7ef5c-8e7e-4b98-bd3e-ff9c469ce36d)
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 68175601..fbc7b2d9 100644
--- a/Makefile
+++ b/Makefile
@@ -17,7 +17,7 @@ include arch/$(ARCH)/build.mk
INC_DIRS += -I $(SRC_DIR)/include \
-I $(SRC_DIR)/include/lib
-KERNEL_OBJS := timer.o mqueue.o pipe.o semaphore.o mutex.o logger.o error.o syscall.o task.o main.o
+KERNEL_OBJS := timer.o mqueue.o pipe.o semaphore.o mutex.o logger.o error.o syscall.o task.o memprot.o main.o
KERNEL_OBJS := $(addprefix $(BUILD_KERNEL_DIR)/,$(KERNEL_OBJS))
deps += $(KERNEL_OBJS:%.o=%.o.d)
@@ -29,7 +29,7 @@ deps += $(LIB_OBJS:%.o=%.o.d)
APPS := coop echo hello mqueues semaphore mutex cond \
pipes pipes_small pipes_struct prodcons progress \
rtsched suspend test64 timer timer_kill \
- cpubench test_libc umode
+ cpubench test_libc umode privilege_switch pmp
# Output files for __link target
IMAGE_BASE := $(BUILD_DIR)/image
diff --git a/app/pmp.c b/app/pmp.c
new file mode 100644
index 00000000..91b4a9a3
--- /dev/null
+++ b/app/pmp.c
@@ -0,0 +1,348 @@
+/* PMP Memory Isolation Test
+ *
+ * Validates PMP-based memory protection during task context switches.
+ *
+ * Test Suite:
+ * Test 1: Context Switch & Stack Integrity
+ * - Validates PMP correctly isolates task stacks during context
+ * switches
+ * - Runs to completion, reports PASS/FAIL
+ *
+ * Test 2a: Kernel Protection (Destructive)
+ * - Validates U-mode cannot write to kernel memory
+ * - Triggers PMP fault and task termination
+ *
+ * Test 2b: Inter-Task Isolation (Destructive)
+ * - Validates U-mode cannot access another task's stack
+ * - Triggers PMP fault and task termination
+ */
+
+#include
+
+/* Test configuration */
+#define MAX_ITERATIONS 10
+#define STACK_MAGIC_A 0xAAAAAAAA
+#define STACK_MAGIC_B 0xBBBBBBBB
+#define STACK_MAGIC_C 0xCCCCCCCC
+
+/* Test state tracking */
+static volatile int tests_passed = 0;
+static volatile int tests_failed = 0;
+static volatile int tasks_completed = 0;
+
+/* Cross-task attack: Task B exports its stack address for attacker task */
+static volatile uint32_t *task_b_stack_addr = NULL;
+
+/* External kernel symbols */
+extern uint32_t _stext, _etext;
+extern uint32_t _sdata, _edata;
+
+/* ========================================================================
+ * Test 1: Context Switch & Stack Integrity Check
+ * ======================================================================== */
+
+/* Task A: Stack integrity validation with magic value 0xAAAAAAAA */
+void task_a_integrity(void)
+{
+ int my_tid = sys_tid();
+ umode_printf("[Task A] TID=%d starting (U-mode)\n", my_tid);
+
+ /* Allocate critical data on stack */
+ volatile uint32_t stack_guard = STACK_MAGIC_A;
+ volatile uint32_t iteration_count = 0;
+
+ umode_printf("[Task A] Stack guard at %p = 0x%08x\n", (void *) &stack_guard,
+ (unsigned int) stack_guard);
+
+ for (int i = 0; i < MAX_ITERATIONS; i++) {
+ iteration_count = i + 1;
+
+ sys_tyield();
+
+ /* Verify stack integrity */
+ if (stack_guard != STACK_MAGIC_A) {
+ umode_printf(
+ "[Task A] FAIL: Stack corrupted! "
+ "Expected 0x%08x, got 0x%08x at iteration %d\n",
+ (unsigned int) STACK_MAGIC_A, (unsigned int) stack_guard,
+ (int) iteration_count);
+ tests_failed++;
+ tasks_completed++;
+ while (1)
+ sys_tyield();
+ }
+
+ /* Verify iteration counter */
+ if (iteration_count != (uint32_t) (i + 1)) {
+ umode_printf("[Task A] FAIL: Iteration counter corrupted!\n");
+ tests_failed++;
+ tasks_completed++;
+ while (1)
+ sys_tyield();
+ }
+ }
+
+ umode_printf("[Task A] PASS: Stack integrity verified across %d switches\n",
+ MAX_ITERATIONS);
+ tests_passed++;
+ tasks_completed++;
+
+ /* Keep task alive */
+ while (1) {
+ for (int i = 0; i < 20; i++)
+ sys_tyield();
+ }
+}
+
+/* Task B: Stack integrity validation with magic value 0xBBBBBBBB */
+void task_b_integrity(void)
+{
+ int my_tid = sys_tid();
+ umode_printf("[Task B] TID=%d starting (U-mode)\n", my_tid);
+
+ volatile uint32_t stack_guard = STACK_MAGIC_B;
+ volatile uint32_t checksum = 0;
+
+ /* Export stack address for cross-task attack test */
+ task_b_stack_addr = &stack_guard;
+
+ umode_printf("[Task B] Stack guard at %p = 0x%08x\n", (void *) &stack_guard,
+ (unsigned int) stack_guard);
+
+ for (int i = 0; i < MAX_ITERATIONS; i++) {
+ checksum += (i + 1);
+
+ sys_tyield();
+
+ if (stack_guard != STACK_MAGIC_B) {
+ umode_printf(
+ "[Task B] FAIL: Stack guard corrupted! "
+ "Expected 0x%08x, got 0x%08x\n",
+ (unsigned int) STACK_MAGIC_B, (unsigned int) stack_guard);
+ tests_failed++;
+ tasks_completed++;
+ while (1)
+ sys_tyield();
+ }
+
+ uint32_t expected_checksum = ((i + 1) * (i + 2)) / 2;
+ if (checksum != expected_checksum) {
+ umode_printf(
+ "[Task B] FAIL: Checksum mismatch! "
+ "Expected %u, got %u\n",
+ (unsigned int) expected_checksum, (unsigned int) checksum);
+ tests_failed++;
+ tasks_completed++;
+ while (1)
+ sys_tyield();
+ }
+ }
+
+ umode_printf("[Task B] PASS: Stack integrity and checksum verified\n");
+ tests_passed++;
+ tasks_completed++;
+
+ while (1) {
+ for (int i = 0; i < 20; i++)
+ sys_tyield();
+ }
+}
+
+/* Task C: Stack integrity with array operations */
+void task_c_integrity(void)
+{
+ int my_tid = sys_tid();
+ umode_printf("[Task C] TID=%d starting (U-mode)\n", my_tid);
+
+ volatile uint32_t stack_array[4] = {STACK_MAGIC_C, STACK_MAGIC_C + 1,
+ STACK_MAGIC_C + 2, STACK_MAGIC_C + 3};
+
+ umode_printf("[Task C] Stack array at %p\n", (void *) stack_array);
+
+ for (int i = 0; i < MAX_ITERATIONS; i++) {
+ sys_tyield();
+
+ for (int j = 0; j < 4; j++) {
+ uint32_t expected = STACK_MAGIC_C + j;
+ if (stack_array[j] != expected) {
+ umode_printf(
+ "[Task C] FAIL: Array[%d] corrupted! "
+ "Expected 0x%08x, got 0x%08x\n",
+ j, (unsigned int) expected, (unsigned int) stack_array[j]);
+ tests_failed++;
+ tasks_completed++;
+ while (1)
+ sys_tyield();
+ }
+ }
+ }
+
+ umode_printf("[Task C] PASS: Stack array integrity verified\n");
+ tests_passed++;
+ tasks_completed++;
+
+ while (1) {
+ for (int i = 0; i < 20; i++)
+ sys_tyield();
+ }
+}
+
+/* ========================================================================
+ * Test 2a: Kernel Protection (Destructive - Triggers Fault)
+ * ======================================================================== */
+
+/* U-mode write to kernel memory (triggers PMP fault) */
+void task_kernel_attack(void)
+{
+ sys_tdelay(50); /* Wait for Test 1 to complete */
+
+ umode_printf("\n=== Test 2a: Kernel Protection ===\n");
+ umode_printf("Attempting to write to kernel .text at %p\n",
+ (void *) &_stext);
+ umode_printf("Expected: [PMP] Task terminated\n");
+ umode_printf("\nResult:\n");
+
+ sys_tdelay(10);
+
+ volatile uint32_t *kernel_addr = (volatile uint32_t *) &_stext;
+ *kernel_addr = 0xDEADBEEF;
+
+ /* Should not reach here - PMP should terminate this task */
+ umode_printf("FAIL: Successfully wrote to kernel memory!\n");
+ tests_failed++;
+
+ while (1)
+ sys_tyield();
+}
+
+/* ========================================================================
+ * Test 2b: Inter-Task Isolation (Destructive - Triggers Fault)
+ * ======================================================================== */
+
+/* U-mode task attempts to read another task's stack (triggers PMP fault) */
+void task_cross_attack(void)
+{
+ /* Wait for Task B to export its stack address */
+ while (!task_b_stack_addr)
+ sys_tyield();
+
+ sys_tdelay(70); /* Wait for Test 2a to complete */
+
+ umode_printf("\n=== Test 2b: Inter-Task Isolation ===\n");
+ umode_printf("Attempting to read Task B's stack at %p\n",
+ (void *) task_b_stack_addr);
+ umode_printf("Expected: [PMP] Task terminated\n");
+ umode_printf("\nResult:\n");
+
+ sys_tdelay(10);
+
+ /* Attempt to read Task B's stack - should trigger PMP fault */
+ volatile uint32_t stolen_value = *task_b_stack_addr;
+
+ /* Should not reach here - PMP should terminate this task */
+ umode_printf("FAIL: Successfully read Task B's stack! Value: 0x%08x\n",
+ (unsigned int) stolen_value);
+ tests_failed++;
+
+ while (1)
+ sys_tyield();
+}
+
+/* ========================================================================
+ * Monitor Task
+ * ======================================================================== */
+
+void monitor_task(void)
+{
+ umode_printf("\n");
+ umode_printf("=================================================\n");
+ umode_printf(" PMP Memory Isolation Test Suite\n");
+ umode_printf("=================================================\n");
+ umode_printf("Tests:\n");
+ umode_printf(" [Test 1] Context Switch & Stack Integrity\n");
+ umode_printf(" [Test 2a] Kernel Protection\n");
+ umode_printf(" [Test 2b] Inter-Task Isolation\n");
+ umode_printf("=================================================\n");
+
+ /* Wait for Test 1 tasks to complete */
+ int cycles = 0;
+ while (tasks_completed < 3 && cycles < 200) {
+ cycles++;
+ for (int i = 0; i < 10; i++)
+ sys_tyield();
+ }
+
+ /* Report Test 1 results */
+ umode_printf("\n=== Test 1: Context Switch & Stack Integrity ===\n");
+ umode_printf("Tasks: %d/3, Passed: %d, Failed: %d\n", tasks_completed,
+ tests_passed, tests_failed);
+
+ if (tasks_completed == 3 && tests_passed == 3 && tests_failed == 0) {
+ umode_printf("Status: PASS\n");
+ } else {
+ umode_printf("Status: FAIL\n");
+ }
+
+ /* Wait for Test 2a and 2b to complete */
+ int failed_before = tests_failed;
+ sys_tdelay(150);
+
+ /* Verify Test 2 results - if tests_failed didn't increase, PMP worked */
+ if (tests_failed == failed_before) {
+ umode_printf("\nStatus: PASS\n");
+ } else {
+ umode_printf("\nStatus: FAIL\n");
+ }
+
+ /* Final summary */
+ umode_printf("\n=================================================\n");
+ if (tests_failed == 0) {
+ umode_printf("ALL PMP TESTS PASSED\n");
+ } else {
+ umode_printf("PMP TESTS FAILED: %d test(s) failed\n", tests_failed);
+ }
+ umode_printf("=================================================\n");
+
+ while (1) {
+ for (int i = 0; i < 50; i++)
+ sys_tyield();
+ }
+}
+
+/* ========================================================================
+ * Application Entry Point
+ * ======================================================================== */
+
+int32_t app_main(void)
+{
+ printf("\nKernel memory regions:\n");
+ printf(" .text: %p to %p\n", (void *) &_stext, (void *) &_etext);
+ printf(" .data: %p to %p\n", (void *) &_sdata, (void *) &_edata);
+
+ printf("\nCreating U-mode test tasks...\n");
+
+ /* Create Test 1 tasks - Context Switch & Stack Integrity */
+ int32_t task_a = mo_task_spawn_user(task_a_integrity, 1024);
+ int32_t task_b = mo_task_spawn_user(task_b_integrity, 1024);
+ int32_t task_c = mo_task_spawn_user(task_c_integrity, 1024);
+ int32_t monitor = mo_task_spawn_user(monitor_task, 1024);
+
+ /* Test 2a: Kernel Protection */
+ int32_t kernel_test = mo_task_spawn_user(task_kernel_attack, 1024);
+
+ /* Test 2b: Inter-Task Isolation */
+ int32_t cross_test = mo_task_spawn_user(task_cross_attack, 1024);
+
+ if (task_a < 0 || task_b < 0 || task_c < 0 || monitor < 0 ||
+ kernel_test < 0 || cross_test < 0) {
+ printf("ERROR: Failed to create test tasks\n");
+ return false;
+ }
+
+ printf("\nTasks created:\n");
+ printf(" Monitor: TID %d\n", (int) monitor);
+ printf(" Test 2a (Kernel Protection): TID %d\n", (int) kernel_test);
+ printf(" Test 2b (Inter-Task Isolation): TID %d\n", (int) cross_test);
+
+ return true; /* Enable preemptive scheduling */
+}
diff --git a/app/privilege_switch.c b/app/privilege_switch.c
new file mode 100644
index 00000000..3b786750
--- /dev/null
+++ b/app/privilege_switch.c
@@ -0,0 +1,33 @@
+#include
+
+/* M-mode task: Continuously delays to test M-mode ecall context switch */
+void mmode_task(void)
+{
+ int iteration = 0;
+ while (1) {
+ CRITICAL_ENTER();
+ printf("[M-mode] iteration %d\n", iteration++);
+ CRITICAL_LEAVE();
+ mo_task_delay(2);
+ }
+}
+
+/* U-mode task: Continuously delays to test U-mode syscall and kernel stack */
+void umode_task(void)
+{
+ int iteration = 0;
+ while (1) {
+ umode_printf("[U-mode] iteration %d\n", iteration++);
+ sys_tdelay(2);
+ }
+}
+
+int32_t app_main(void)
+{
+ printf("[Kernel] Privilege Mode Switching Test: M-mode <-> U-mode\n");
+
+ mo_task_spawn(mmode_task, DEFAULT_STACK_SIZE);
+ mo_task_spawn_user(umode_task, DEFAULT_STACK_SIZE);
+
+ return 1;
+}
diff --git a/app/umode.c b/app/umode.c
index 518e111d..a406b90f 100644
--- a/app/umode.c
+++ b/app/umode.c
@@ -1,43 +1,72 @@
#include
-/* U-mode Validation Task
+/* Architecture-specific helper for SP manipulation testing.
+ * Implemented in arch/riscv/entry.c as a naked function.
+ */
+extern uint32_t __switch_sp(uint32_t new_sp);
+
+/* U-mode validation: syscall stability and privilege isolation.
*
- * Integrates two tests into a single task flow to ensure sequential execution:
- * 1. Phase 1: Mechanism Check - Verify syscalls work.
- * 2. Phase 2: Security Check - Verify privileged instructions trigger a trap.
+ * Phase 1: Verify syscalls work under various SP conditions (normal,
+ * malicious). Phase 2: Verify privileged instructions trap.
*/
void umode_validation_task(void)
{
- /* --- Phase 1: Mechanism Check (Syscalls) --- */
- umode_printf("[umode] Phase 1: Testing Syscall Mechanism\n");
+ /* --- Phase 1: Kernel Stack Isolation Test --- */
+ umode_printf("[umode] Phase 1: Testing Kernel Stack Isolation\n");
+ umode_printf("\n");
- /* Test 1: sys_tid() - Simplest read-only syscall. */
+ /* Test 1a: Baseline - Syscall with normal SP */
+ umode_printf("[umode] Test 1a: sys_tid() with normal SP\n");
int my_tid = sys_tid();
if (my_tid > 0) {
umode_printf("[umode] PASS: sys_tid() returned %d\n", my_tid);
} else {
umode_printf("[umode] FAIL: sys_tid() failed (ret=%d)\n", my_tid);
}
+ umode_printf("\n");
+
+ /* Test 1b: Verify ISR uses mscratch, not malicious user SP */
+ umode_printf("[umode] Test 1b: sys_tid() with malicious SP\n");
+
+ uint32_t saved_sp = __switch_sp(0xDEADBEEF);
+ int my_tid_bad_sp = sys_tid();
+ __switch_sp(saved_sp);
+
+ if (my_tid_bad_sp > 0) {
+ umode_printf(
+ "[umode] PASS: sys_tid() succeeded, ISR correctly used kernel "
+ "stack\n");
+ } else {
+ umode_printf(
+ "[umode] FAIL: Syscall failed with malicious SP (ret=%d)\n",
+ my_tid_bad_sp);
+ }
+ umode_printf("\n");
- /* Test 2: sys_uptime() - Verify value transmission is correct. */
+ /* Test 1c: Verify syscall functionality is still intact */
+ umode_printf("[umode] Test 1c: sys_uptime() with normal SP\n");
int uptime = sys_uptime();
if (uptime >= 0) {
umode_printf("[umode] PASS: sys_uptime() returned %d\n", uptime);
} else {
umode_printf("[umode] FAIL: sys_uptime() failed (ret=%d)\n", uptime);
}
+ umode_printf("\n");
- /* Note: Skipping sys_tadd for now, as kernel user pointer checks might
- * block function pointers in the .text segment, avoiding distraction.
- */
+ umode_printf(
+ "[umode] Phase 1 Complete: Kernel stack isolation validated\n");
+ umode_printf("\n");
/* --- Phase 2: Security Check (Privileged Access) --- */
umode_printf("[umode] ========================================\n");
+ umode_printf("\n");
umode_printf("[umode] Phase 2: Testing Security Isolation\n");
+ umode_printf("\n");
umode_printf(
"[umode] Action: Attempting to read 'mstatus' CSR from U-mode.\n");
umode_printf("[umode] Expect: Kernel Panic with 'Illegal instruction'.\n");
- umode_printf("[umode] ========================================\n");
+ umode_printf("\n");
/* CRITICAL: Delay before suicide to ensure logs are flushed from
* buffer to UART.
diff --git a/arch/riscv/boot.c b/arch/riscv/boot.c
index 8e46f4c9..89d3f331 100644
--- a/arch/riscv/boot.c
+++ b/arch/riscv/boot.c
@@ -16,9 +16,12 @@ extern uint32_t _sbss, _ebss;
/* C entry points */
void main(void);
-void do_trap(uint32_t cause, uint32_t epc);
+void do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp);
void hal_panic(void);
+/* Current task's kernel stack top (set by dispatcher, NULL for M-mode tasks) */
+extern void *current_kernel_stack_top;
+
/* Machine-mode entry point ('_entry'). This is the first code executed on
* reset. It performs essential low-level setup of the processor state,
* initializes memory, and then jumps to the C-level main function.
@@ -93,22 +96,29 @@ __attribute__((naked, section(".text.prologue"))) void _entry(void)
: "memory");
}
-/* Size of the full trap context frame saved on the stack by the ISR.
- * 30 GPRs (x1, x3-x31) + mcause + mepc + mstatus = 33 words * 4 bytes = 132
- * bytes. Round up to 144 bytes for 16-byte alignment.
+/* ISR trap frame layout (144 bytes = 36 words).
+ * [0-29]: GPRs (ra, gp, tp, t0-t6, s0-s11, a0-a7)
+ * [30]: mcause
+ * [31]: mepc
+ * [32]: mstatus
+ * [33]: SP (user SP in U-mode, original SP in M-mode)
*/
#define ISR_CONTEXT_SIZE 144
-/* Low-level Interrupt Service Routine (ISR) trampoline.
- *
- * This is the common entry point for all traps. It performs a FULL context
- * save, creating a complete trap frame on the stack. This makes the C handler
- * robust, as it does not need to preserve any registers itself.
- */
+/* Low-level ISR common entry for all traps with full context save */
__attribute__((naked, aligned(4))) void _isr(void)
{
asm volatile(
- /* Allocate stack frame for full context save */
+ /* Blind swap with mscratch for kernel stack isolation.
+ * Convention: M-mode (mscratch=0, SP=kernel), U-mode (mscratch=kernel,
+ * SP=user). After swap: if SP != 0 came from U-mode, else M-mode.
+ */
+ "csrrw sp, mscratch, sp\n"
+ "bnez sp, .Lumode_entry\n"
+
+ /* Undo swap and continue for M-mode */
+ "csrrw sp, mscratch, sp\n"
+
"addi sp, sp, -%0\n"
/* Save all general-purpose registers except x0 (zero) and x2 (sp).
@@ -120,7 +130,7 @@ __attribute__((naked, aligned(4))) void _isr(void)
* 48: a4, 52: a5, 56: a6, 60: a7, 64: s2, 68: s3
* 72: s4, 76: s5, 80: s6, 84: s7, 88: s8, 92: s9
* 96: s10, 100:s11, 104:t3, 108: t4, 112: t5, 116: t6
- * 120: mcause, 124: mepc
+ * 120: mcause, 124: mepc, 128: mstatus, 132: SP
*/
"sw ra, 0*4(sp)\n"
"sw gp, 1*4(sp)\n"
@@ -153,33 +163,158 @@ __attribute__((naked, aligned(4))) void _isr(void)
"sw t5, 28*4(sp)\n"
"sw t6, 29*4(sp)\n"
- /* Save trap-related CSRs and prepare arguments for do_trap */
+ /* Save original SP before frame allocation */
+ "addi t0, sp, %0\n"
+ "sw t0, 33*4(sp)\n"
+
+ /* Save machine CSRs (mcause, mepc, mstatus) */
"csrr a0, mcause\n"
"csrr a1, mepc\n"
- "csrr a2, mstatus\n" /* For context switching in privilege change */
-
+ "csrr a2, mstatus\n"
"sw a0, 30*4(sp)\n"
"sw a1, 31*4(sp)\n"
"sw a2, 32*4(sp)\n"
- "mv a2, sp\n" /* a2 = isr_sp */
-
- /* Call the high-level C trap handler.
- * Returns: a0 = SP to use for restoring context (may be different
- * task's stack if context switch occurred).
- */
+ /* Call trap handler with frame pointer */
+ "mv a2, sp\n"
"call do_trap\n"
+ "mv sp, a0\n"
+
+ /* Load mstatus and extract MPP to determine M-mode or U-mode return
+ path */
+ "lw t0, 32*4(sp)\n"
+ "csrw mstatus, t0\n"
+
+ "srli t1, t0, 11\n"
+ "andi t1, t1, 0x3\n"
+ "beqz t1, .Lrestore_umode\n"
+
+ /* M-mode restore */
+ ".Lrestore_mmode:\n"
+ "csrw mscratch, zero\n"
+
+ "lw t1, 31*4(sp)\n" /* Restore mepc */
+ "csrw mepc, t1\n"
+
+ /* Restore all GPRs */
+ "lw ra, 0*4(sp)\n"
+ "lw gp, 1*4(sp)\n"
+ "lw tp, 2*4(sp)\n"
+ "lw t0, 3*4(sp)\n"
+ "lw t1, 4*4(sp)\n"
+ "lw t2, 5*4(sp)\n"
+ "lw s0, 6*4(sp)\n"
+ "lw s1, 7*4(sp)\n"
+ "lw a0, 8*4(sp)\n"
+ "lw a1, 9*4(sp)\n"
+ "lw a2, 10*4(sp)\n"
+ "lw a3, 11*4(sp)\n"
+ "lw a4, 12*4(sp)\n"
+ "lw a5, 13*4(sp)\n"
+ "lw a6, 14*4(sp)\n"
+ "lw a7, 15*4(sp)\n"
+ "lw s2, 16*4(sp)\n"
+ "lw s3, 17*4(sp)\n"
+ "lw s4, 18*4(sp)\n"
+ "lw s5, 19*4(sp)\n"
+ "lw s6, 20*4(sp)\n"
+ "lw s7, 21*4(sp)\n"
+ "lw s8, 22*4(sp)\n"
+ "lw s9, 23*4(sp)\n"
+ "lw s10, 24*4(sp)\n"
+ "lw s11, 25*4(sp)\n"
+ "lw t3, 26*4(sp)\n"
+ "lw t4, 27*4(sp)\n"
+ "lw t5, 28*4(sp)\n"
+ "lw t6, 29*4(sp)\n"
+
+ /* Restore SP from frame[33] */
+ "lw sp, 33*4(sp)\n"
- /* Use returned SP for context restore (enables context switching) */
+ /* Return from trap */
+ "mret\n"
+
+ /* U-mode entry receives kernel stack in SP and user SP in mscratch */
+ ".Lumode_entry:\n"
+ "addi sp, sp, -%0\n"
+
+ /* Save t6 first to preserve it before using it as scratch */
+ "sw t6, 29*4(sp)\n"
+
+ /* Retrieve user SP from mscratch into t6 and save it */
+ "csrr t6, mscratch\n"
+ "sw t6, 33*4(sp)\n"
+
+ /* Save remaining GPRs */
+ "sw ra, 0*4(sp)\n"
+ "sw gp, 1*4(sp)\n"
+ "sw tp, 2*4(sp)\n"
+ "sw t0, 3*4(sp)\n"
+ "sw t1, 4*4(sp)\n"
+ "sw t2, 5*4(sp)\n"
+ "sw s0, 6*4(sp)\n"
+ "sw s1, 7*4(sp)\n"
+ "sw a0, 8*4(sp)\n"
+ "sw a1, 9*4(sp)\n"
+ "sw a2, 10*4(sp)\n"
+ "sw a3, 11*4(sp)\n"
+ "sw a4, 12*4(sp)\n"
+ "sw a5, 13*4(sp)\n"
+ "sw a6, 14*4(sp)\n"
+ "sw a7, 15*4(sp)\n"
+ "sw s2, 16*4(sp)\n"
+ "sw s3, 17*4(sp)\n"
+ "sw s4, 18*4(sp)\n"
+ "sw s5, 19*4(sp)\n"
+ "sw s6, 20*4(sp)\n"
+ "sw s7, 21*4(sp)\n"
+ "sw s8, 22*4(sp)\n"
+ "sw s9, 23*4(sp)\n"
+ "sw s10, 24*4(sp)\n"
+ "sw s11, 25*4(sp)\n"
+ "sw t3, 26*4(sp)\n"
+ "sw t4, 27*4(sp)\n"
+ "sw t5, 28*4(sp)\n"
+ /* t6 already saved */
+
+ /* Save CSRs */
+ "csrr a0, mcause\n"
+ "csrr a1, mepc\n"
+ "csrr a2, mstatus\n"
+ "sw a0, 30*4(sp)\n"
+ "sw a1, 31*4(sp)\n"
+ "sw a2, 32*4(sp)\n"
+
+ "mv a2, sp\n" /* a2 = ISR frame pointer */
+ "call do_trap\n"
"mv sp, a0\n"
- /* Restore mstatus from frame[32] */
+ /* Check MPP in mstatus to determine return path */
"lw t0, 32*4(sp)\n"
"csrw mstatus, t0\n"
- /* Restore mepc from frame[31] (might have been modified by handler) */
+ "srli t1, t0, 11\n"
+ "andi t1, t1, 0x3\n"
+ "bnez t1, .Lrestore_mmode\n"
+
+ /* Setup mscratch for U-mode restore to prepare for next trap */
+ ".Lrestore_umode:\n"
"lw t1, 31*4(sp)\n"
"csrw mepc, t1\n"
+
+ /* Setup mscratch = kernel stack for next trap entry.
+ * U-mode convention: mscratch holds kernel stack, SP holds user stack.
+ * On next trap, csrrw will swap them: SP gets kernel, mscratch gets
+ * user. Load current task's kernel stack top (set by dispatcher).
+ */
+ "la t0, current_kernel_stack_top\n"
+ "lw t0, 0(t0)\n" /* t0 = *current_kernel_stack_top */
+ "bnez t0, 1f\n" /* If non-NULL, use it */
+ "la t0, _stack\n" /* Fallback to global stack if NULL */
+ "1:\n"
+ "csrw mscratch, t0\n"
+
+ /* Restore all GPRs */
"lw ra, 0*4(sp)\n"
"lw gp, 1*4(sp)\n"
"lw tp, 2*4(sp)\n"
@@ -211,12 +346,12 @@ __attribute__((naked, aligned(4))) void _isr(void)
"lw t5, 28*4(sp)\n"
"lw t6, 29*4(sp)\n"
- /* Deallocate stack frame */
- "addi sp, sp, %0\n"
+ /* Restore user SP from frame[33] */
+ "lw sp, 33*4(sp)\n"
/* Return from trap */
"mret\n"
- : /* no outputs */
- : "i"(ISR_CONTEXT_SIZE) /* +16 for mcause, mepc, mstatus */
+ : /* no outputs */
+ : "i"(ISR_CONTEXT_SIZE)
: "memory");
}
diff --git a/arch/riscv/build.mk b/arch/riscv/build.mk
index 243a6ea2..19d224dc 100644
--- a/arch/riscv/build.mk
+++ b/arch/riscv/build.mk
@@ -70,7 +70,7 @@ LDFLAGS += --gc-sections
ARFLAGS = r
LDSCRIPT = $(ARCH_DIR)/riscv32-qemu.ld
-HAL_OBJS := boot.o hal.o muldiv.o
+HAL_OBJS := boot.o hal.o muldiv.o pmp.o
HAL_OBJS := $(addprefix $(BUILD_KERNEL_DIR)/,$(HAL_OBJS))
deps += $(HAL_OBJS:%.o=%.o.d)
diff --git a/arch/riscv/csr.h b/arch/riscv/csr.h
index 2f27ed81..081c2c7c 100644
--- a/arch/riscv/csr.h
+++ b/arch/riscv/csr.h
@@ -179,3 +179,82 @@
/* Machine Scratch Register - For temporary storage during traps */
#define CSR_MSCRATCH 0x340
+
+/* PMP Address Registers (pmpaddr0-pmpaddr15) - 16 regions maximum
+ * In TOR (Top-of-Range) mode, these define the upper boundary of each region.
+ * The lower boundary is defined by the previous region's upper boundary.
+ */
+#define CSR_PMPADDR0 0x3b0
+#define CSR_PMPADDR1 0x3b1
+#define CSR_PMPADDR2 0x3b2
+#define CSR_PMPADDR3 0x3b3
+#define CSR_PMPADDR4 0x3b4
+#define CSR_PMPADDR5 0x3b5
+#define CSR_PMPADDR6 0x3b6
+#define CSR_PMPADDR7 0x3b7
+#define CSR_PMPADDR8 0x3b8
+#define CSR_PMPADDR9 0x3b9
+#define CSR_PMPADDR10 0x3ba
+#define CSR_PMPADDR11 0x3bb
+#define CSR_PMPADDR12 0x3bc
+#define CSR_PMPADDR13 0x3bd
+#define CSR_PMPADDR14 0x3be
+#define CSR_PMPADDR15 0x3bf
+
+/* PMP Configuration Registers (pmpcfg0-pmpcfg3)
+ * Each configuration register controls 4 PMP regions (on RV32).
+ * pmpcfg0 controls pmpaddr0-3, pmpcfg1 controls pmpaddr4-7, etc.
+ */
+#define CSR_PMPCFG0 0x3a0
+#define CSR_PMPCFG1 0x3a1
+#define CSR_PMPCFG2 0x3a2
+#define CSR_PMPCFG3 0x3a3
+
+/* PMP Configuration Field Bits (8 bits per region within pmpcfg)
+ * Layout in each byte of pmpcfg:
+ * Bit 7: L (Lock) - Locks this region until hardware reset
+ * Bits 6-5: Reserved
+ * Bits 4-3: A (Address Matching Mode)
+ * Bit 2: X (Execute permission)
+ * Bit 1: W (Write permission)
+ * Bit 0: R (Read permission)
+ */
+
+/* Lock bit: Prevents further modification of this region */
+#define PMPCFG_L (1U << 7)
+
+/* Address Matching Mode (bits 3-4)
+ * Choose TOR mode for no alignment requirements on region sizes, and support
+ * for arbitrary address ranges.
+ */
+#define PMPCFG_A_SHIFT 3
+#define PMPCFG_A_MASK (0x3U << PMPCFG_A_SHIFT)
+#define PMPCFG_A_OFF (0x0U << PMPCFG_A_SHIFT) /* Null region (disabled) */
+#define PMPCFG_A_TOR (0x1U << PMPCFG_A_SHIFT) /* Top-of-Range mode */
+
+/* Permission bits */
+#define PMPCFG_X (1U << 2) /* Execute permission */
+#define PMPCFG_W (1U << 1) /* Write permission */
+#define PMPCFG_R (1U << 0) /* Read permission */
+
+/* Common permission combinations */
+#define PMPCFG_PERM_NONE (0x0U) /* No access */
+#define PMPCFG_PERM_R (PMPCFG_R) /* Read-only */
+#define PMPCFG_PERM_RW (PMPCFG_R | PMPCFG_W) /* Read-Write */
+#define PMPCFG_PERM_X (PMPCFG_X) /* Execute-only */
+#define PMPCFG_PERM_RX (PMPCFG_R | PMPCFG_X) /* Read-Execute */
+#define PMPCFG_PERM_RWX (PMPCFG_R | PMPCFG_W | PMPCFG_X) /* All access */
+
+/* Utility macros for PMP configuration manipulation */
+
+/* Extract PMP address matching mode */
+#define PMPCFG_GET_A(cfg) (((cfg) & PMPCFG_A_MASK) >> PMPCFG_A_SHIFT)
+
+/* Extract permission bits from configuration byte */
+#define PMPCFG_GET_PERM(cfg) ((cfg) & (PMPCFG_R | PMPCFG_W | PMPCFG_X))
+
+/* Check if region is locked */
+#define PMPCFG_IS_LOCKED(cfg) (((cfg) & PMPCFG_L) != 0)
+
+/* Check if region is enabled (address mode is not OFF) */
+#define PMPCFG_IS_ENABLED(cfg) (PMPCFG_GET_A(cfg) != PMPCFG_A_OFF)
diff --git a/arch/riscv/entry.c b/arch/riscv/entry.c
index 9956558e..da9a53b4 100644
--- a/arch/riscv/entry.c
+++ b/arch/riscv/entry.c
@@ -15,6 +15,7 @@
*/
#include
+#include
/* Architecture-specific syscall implementation using ecall trap.
* This overrides the weak symbol defined in kernel/syscall.c.
@@ -40,3 +41,26 @@ int syscall(int num, void *arg1, void *arg2, void *arg3)
return a0;
}
+
+/* Stack Pointer Swap for Testing
+ *
+ * This naked function provides atomic SP swapping for kernel validation tests.
+ * Using __attribute__((naked)) ensures the compiler does not generate any
+ * prologue/epilogue code that would use the stack, and prevents instruction
+ * reordering that could break the swap semantics.
+ *
+ * Inspired by Linux kernel's __switch_to for context switching.
+ */
+
+/* Atomically swap the stack pointer with a new value.
+ * @new_sp: New stack pointer value to install (in a0)
+ * @return: Previous stack pointer value (in a0)
+ */
+__attribute__((naked)) uint32_t __switch_sp(uint32_t new_sp)
+{
+ asm volatile(
+ "mv t0, sp \n" /* Save current SP to temporary */
+ "mv sp, a0 \n" /* Install new SP from argument */
+ "mv a0, t0 \n" /* Return old SP in a0 */
+ "ret \n");
+}
diff --git a/arch/riscv/hal.c b/arch/riscv/hal.c
index 7ad5806f..9b683ccc 100644
--- a/arch/riscv/hal.c
+++ b/arch/riscv/hal.c
@@ -3,6 +3,7 @@
#include
#include "csr.h"
+#include "pmp.h"
#include "private/stdio.h"
#include "private/utils.h"
@@ -48,39 +49,40 @@
* Indices are in word offsets (divide byte offset by 4).
*/
enum {
- FRAME_RA = 0, /* x1 - Return Address */
- FRAME_GP = 1, /* x3 - Global Pointer */
- FRAME_TP = 2, /* x4 - Thread Pointer */
- FRAME_T0 = 3, /* x5 - Temporary register 0 */
- FRAME_T1 = 4, /* x6 - Temporary register 1 */
- FRAME_T2 = 5, /* x7 - Temporary register 2 */
- FRAME_S0 = 6, /* x8 - Saved register 0 / Frame Pointer */
- FRAME_S1 = 7, /* x9 - Saved register 1 */
- FRAME_A0 = 8, /* x10 - Argument/Return 0 */
- FRAME_A1 = 9, /* x11 - Argument/Return 1 */
- FRAME_A2 = 10, /* x12 - Argument 2 */
- FRAME_A3 = 11, /* x13 - Argument 3 */
- FRAME_A4 = 12, /* x14 - Argument 4 */
- FRAME_A5 = 13, /* x15 - Argument 5 */
- FRAME_A6 = 14, /* x16 - Argument 6 */
- FRAME_A7 = 15, /* x17 - Argument 7 / Syscall Number */
- FRAME_S2 = 16, /* x18 - Saved register 2 */
- FRAME_S3 = 17, /* x19 - Saved register 3 */
- FRAME_S4 = 18, /* x20 - Saved register 4 */
- FRAME_S5 = 19, /* x21 - Saved register 5 */
- FRAME_S6 = 20, /* x22 - Saved register 6 */
- FRAME_S7 = 21, /* x23 - Saved register 7 */
- FRAME_S8 = 22, /* x24 - Saved register 8 */
- FRAME_S9 = 23, /* x25 - Saved register 9 */
- FRAME_S10 = 24, /* x26 - Saved register 10 */
- FRAME_S11 = 25, /* x27 - Saved register 11 */
- FRAME_T3 = 26, /* x28 - Temporary register 3 */
- FRAME_T4 = 27, /* x29 - Temporary register 4 */
- FRAME_T5 = 28, /* x30 - Temporary register 5 */
- FRAME_T6 = 29, /* x31 - Temporary register 6 */
- FRAME_MCAUSE = 30, /* Machine Cause CSR */
- FRAME_EPC = 31, /* Machine Exception PC (mepc) */
- FRAME_MSTATUS = 32 /* Machine Status CSR */
+ FRAME_RA = 0, /* x1 - Return Address */
+ FRAME_GP = 1, /* x3 - Global Pointer */
+ FRAME_TP = 2, /* x4 - Thread Pointer */
+ FRAME_T0 = 3, /* x5 - Temporary register 0 */
+ FRAME_T1 = 4, /* x6 - Temporary register 1 */
+ FRAME_T2 = 5, /* x7 - Temporary register 2 */
+ FRAME_S0 = 6, /* x8 - Saved register 0 / Frame Pointer */
+ FRAME_S1 = 7, /* x9 - Saved register 1 */
+ FRAME_A0 = 8, /* x10 - Argument/Return 0 */
+ FRAME_A1 = 9, /* x11 - Argument/Return 1 */
+ FRAME_A2 = 10, /* x12 - Argument 2 */
+ FRAME_A3 = 11, /* x13 - Argument 3 */
+ FRAME_A4 = 12, /* x14 - Argument 4 */
+ FRAME_A5 = 13, /* x15 - Argument 5 */
+ FRAME_A6 = 14, /* x16 - Argument 6 */
+ FRAME_A7 = 15, /* x17 - Argument 7 / Syscall Number */
+ FRAME_S2 = 16, /* x18 - Saved register 2 */
+ FRAME_S3 = 17, /* x19 - Saved register 3 */
+ FRAME_S4 = 18, /* x20 - Saved register 4 */
+ FRAME_S5 = 19, /* x21 - Saved register 5 */
+ FRAME_S6 = 20, /* x22 - Saved register 6 */
+ FRAME_S7 = 21, /* x23 - Saved register 7 */
+ FRAME_S8 = 22, /* x24 - Saved register 8 */
+ FRAME_S9 = 23, /* x25 - Saved register 9 */
+ FRAME_S10 = 24, /* x26 - Saved register 10 */
+ FRAME_S11 = 25, /* x27 - Saved register 11 */
+ FRAME_T3 = 26, /* x28 - Temporary register 3 */
+ FRAME_T4 = 27, /* x29 - Temporary register 4 */
+ FRAME_T5 = 28, /* x30 - Temporary register 5 */
+ FRAME_T6 = 29, /* x31 - Temporary register 6 */
+ FRAME_MCAUSE = 30, /* Machine Cause CSR */
+ FRAME_EPC = 31, /* Machine Exception PC (mepc) */
+ FRAME_MSTATUS = 32, /* Machine Status CSR */
+ FRAME_SP = 33 /* Stack Pointer (saved for restore) */
};
/* Global variable to hold the new stack pointer for pending context switch.
@@ -96,6 +98,24 @@ static void *pending_switch_sp = NULL;
*/
static uint32_t current_isr_frame_sp = 0;
+
+/* Trap nesting depth counter to prevent inner traps from overwriting
+ * current_isr_frame_sp. Only the outermost trap should set the ISR frame
+ * pointer that context switching requires.
+ *
+ * Exported to allow trap context detection and avoid unnecessary nested
+ * trap triggering.
+ */
+uint32_t trap_nesting_depth = 0;
+
+/* Current task's kernel stack top address for U-mode trap entry.
+ * For U-mode tasks: points to (kernel_stack + kernel_stack_size).
+ * For M-mode tasks: NULL (uses global _stack).
+ * Updated by dispatcher during context switches.
+ * The ISR restore path loads this into mscratch before mret.
+ */
+void *current_kernel_stack_top = NULL;
+
/* NS16550A UART0 - Memory-mapped registers for the QEMU 'virt' machine's serial
* port.
*/
@@ -273,27 +293,18 @@ static void uart_init(uint32_t baud)
void hal_hardware_init(void)
{
uart_init(USART_BAUD);
+
+ /* Initialize PMP hardware with kernel memory regions */
+ pmp_config_t *pmp_config = pmp_get_config();
+ if (pmp_init_kernel(pmp_config) != 0)
+ hal_panic();
+
/* Set the first timer interrupt. Subsequent interrupts are set in ISR */
mtimecmp_w(mtime_r() + (F_CPU / F_TIMER));
/* Install low-level I/O handlers for the C standard library */
_stdout_install(__putchar);
_stdin_install(__getchar);
_stdpoll_install(__kbhit);
-
- /* Grant U-mode access to all memory for validation purposes.
- * By default, RISC-V PMP denies all access to U-mode, which would cause
- * instruction access faults immediately upon task switch. This minimal
- * setup allows U-mode tasks to execute and serves as a placeholder until
- * the full PMP driver is integrated.
- */
- uint32_t pmpaddr = -1UL; /* Cover entire address space */
- uint8_t pmpcfg = 0x0F; /* TOR, R, W, X enabled */
-
- asm volatile(
- "csrw pmpaddr0, %0\n"
- "csrw pmpcfg0, %1\n"
- :
- : "r"(pmpaddr), "r"(pmpcfg));
}
/* Halts the system in an unrecoverable state */
@@ -354,11 +365,18 @@ static const char *exc_msg[] = {
*/
uint32_t do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp)
{
+ uint32_t ret_sp; /* Return value - SP to use for context restore */
+
/* Reset pending switch at start of every trap */
pending_switch_sp = NULL;
- /* Store ISR frame SP so hal_switch_stack() can save it to prev task */
- current_isr_frame_sp = isr_sp;
+ /* Only the outermost trap sets the ISR frame pointer for context
+ * switching. Inner traps must not overwrite this value.
+ */
+ if (trap_nesting_depth == 0) {
+ current_isr_frame_sp = isr_sp;
+ }
+ trap_nesting_depth++;
if (MCAUSE_IS_INTERRUPT(cause)) { /* Asynchronous Interrupt */
uint32_t int_code = MCAUSE_GET_CODE(cause);
@@ -370,6 +388,15 @@ uint32_t do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp)
mtimecmp_w(mtimecmp_r() + (F_CPU / F_TIMER));
/* Invoke scheduler - parameter 1 = from timer, increment ticks */
dispatcher(1);
+
+ /* Nested traps must return their own SP to unwind properly.
+ * Only the outermost trap performs context switch restoration.
+ */
+ if (trap_nesting_depth > 1) {
+ pending_switch_sp = NULL;
+ ret_sp = isr_sp;
+ goto trap_exit;
+ }
} else {
/* All other interrupt sources are unexpected and fatal */
hal_panic();
@@ -379,18 +406,23 @@ uint32_t do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp)
/* Handle ecall from U-mode - system calls */
if (code == MCAUSE_ECALL_UMODE) {
+ /* Extract syscall arguments from ISR frame */
+ uint32_t *f = (uint32_t *) isr_sp;
+
/* Advance mepc past the ecall instruction (4 bytes) */
uint32_t new_epc = epc + 4;
write_csr(mepc, new_epc);
- /* Extract syscall arguments from ISR frame */
- uint32_t *f = (uint32_t *) isr_sp;
-
int syscall_num = f[FRAME_A7];
void *arg1 = (void *) f[FRAME_A0];
void *arg2 = (void *) f[FRAME_A1];
void *arg3 = (void *) f[FRAME_A2];
+ /* Update frame EPC before syscall dispatch to ensure correct return
+ * address if nested traps occur during syscall execution.
+ */
+ f[FRAME_EPC] = new_epc;
+
/* Dispatch to syscall implementation via direct table lookup.
* Must use do_syscall here instead of syscall() to avoid recursive
* traps, as the user-space syscall() may be overridden with ecall.
@@ -398,11 +430,17 @@ uint32_t do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp)
extern int do_syscall(int num, void *arg1, void *arg2, void *arg3);
int retval = do_syscall(syscall_num, arg1, arg2, arg3);
- /* Store return value and updated PC */
+ /* Store return value */
f[FRAME_A0] = (uint32_t) retval;
- f[FRAME_EPC] = new_epc;
- return isr_sp;
+ /* Return new SP if syscall triggered context switch. Nested traps
+ * return their own SP to properly unwind the call stack.
+ */
+ ret_sp = (trap_nesting_depth > 1)
+ ? isr_sp
+ : (pending_switch_sp ? (uint32_t) pending_switch_sp
+ : isr_sp);
+ goto trap_exit;
}
/* Handle ecall from M-mode - used for yielding in preemptive mode */
@@ -423,8 +461,55 @@ uint32_t do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp)
*/
dispatcher(0);
- /* Return the SP to use - new task's frame or current frame */
- return pending_switch_sp ? (uint32_t) pending_switch_sp : isr_sp;
+ /* Nested traps must return their own SP to unwind properly.
+ * Only the outermost trap performs context switch restoration.
+ * Clear pending switch for nested traps to prevent incorrect
+ * restoration by outer handlers.
+ */
+ if (trap_nesting_depth > 1) {
+ pending_switch_sp = NULL;
+ ret_sp = isr_sp;
+ } else {
+ ret_sp =
+ pending_switch_sp ? (uint32_t) pending_switch_sp : isr_sp;
+ }
+ goto trap_exit;
+ }
+
+ /* Attempt to recover load/store access faults.
+ *
+ * This assumes all U-mode access faults are PMP-related, which holds
+ * for platforms without MMU where PMP is the sole memory protection.
+ * On MCU hardware, bus faults or access to non-existent memory may
+ * also trigger access exceptions, and terminating the task instead
+ * of panicking could hide such hardware issues.
+ */
+ if (code == 5 || code == 7) {
+ uint32_t mtval = read_csr(mtval);
+ int32_t pmp_result = pmp_handle_access_fault(mtval, code == 7);
+ if (pmp_result == PMP_FAULT_RECOVERED) {
+ /* PMP fault handled successfully, return current frame */
+ ret_sp = isr_sp;
+ goto trap_exit;
+ }
+ if (pmp_result == PMP_FAULT_TERMINATE) {
+ /* Task terminated (marked as zombie), switch to next task.
+ * Print diagnostic before switching. */
+ trap_puts("[PMP] Task terminated: ");
+ trap_puts(code == 7 ? "Store" : "Load");
+ trap_puts(" access fault at 0x");
+ for (int i = 28; i >= 0; i -= 4) {
+ uint32_t nibble = (mtval >> i) & 0xF;
+ _putchar(nibble < 10 ? '0' + nibble : 'A' + nibble - 10);
+ }
+ trap_puts("\r\n");
+
+ /* Force context switch to next task */
+ dispatcher(0);
+ ret_sp =
+ pending_switch_sp ? (uint32_t) pending_switch_sp : isr_sp;
+ goto trap_exit;
+ }
}
/* Print exception info via direct UART (safe in trap context) */
@@ -445,7 +530,12 @@ uint32_t do_trap(uint32_t cause, uint32_t epc, uint32_t isr_sp)
}
/* Return the SP to use for context restore - new task's frame or current */
- return pending_switch_sp ? (uint32_t) pending_switch_sp : isr_sp;
+ ret_sp = pending_switch_sp ? (uint32_t) pending_switch_sp : isr_sp;
+
+trap_exit:
+ /* Decrement trap nesting depth before returning */
+ trap_nesting_depth--;
+ return ret_sp;
}
/* Enables the machine-level timer interrupt source */
@@ -495,33 +585,38 @@ extern uint32_t _gp, _end;
*/
void *hal_build_initial_frame(void *stack_top,
void (*task_entry)(void),
- int user_mode)
+ int user_mode,
+ void *kernel_stack,
+ size_t kernel_stack_size)
{
#define INITIAL_STACK_RESERVE \
256 /* Reserve space below stack_top for task startup */
- /* Place frame deeper in stack so after ISR deallocates (sp += 128),
- * SP will be at (stack_top - INITIAL_STACK_RESERVE), not at stack_top.
+ /* For U-mode tasks, build frame on kernel stack instead of user stack.
+ * For M-mode tasks, build frame on user stack as before.
*/
- uint32_t *frame =
- (uint32_t *) ((uint8_t *) stack_top - INITIAL_STACK_RESERVE -
- ISR_STACK_FRAME_SIZE);
+ uint32_t *frame;
+ if (user_mode && kernel_stack) {
+ /* U-mode: Place frame on kernel stack */
+ void *kstack_top = (uint8_t *) kernel_stack + kernel_stack_size;
+ frame = (uint32_t *) ((uint8_t *) kstack_top - ISR_STACK_FRAME_SIZE);
+ } else {
+ /* M-mode: Place frame on user stack with reserve space */
+ frame = (uint32_t *) ((uint8_t *) stack_top - INITIAL_STACK_RESERVE -
+ ISR_STACK_FRAME_SIZE);
+ }
/* Zero out entire frame */
- for (int i = 0; i < 32; i++) {
+ for (int i = 0; i < 36; i++) {
frame[i] = 0;
}
/* Compute tp value same as boot.c: aligned to 64 bytes from _end */
uint32_t tp_val = ((uint32_t) &_end + 63) & ~63U;
- /* Initialize critical registers for proper task startup:
- * - frame[1] = gp: Global pointer, required for accessing global variables
- * - frame[2] = tp: Thread pointer, required for thread-local storage
- * - frame[32] = mepc: Task entry point, where mret will jump to
- */
- frame[1] = (uint32_t) &_gp; /* gp - global pointer */
- frame[2] = tp_val; /* tp - thread pointer */
+ /* Initialize critical registers for proper task startup */
+ frame[FRAME_GP] = (uint32_t) &_gp; /* gp - global pointer */
+ frame[FRAME_TP] = tp_val; /* tp - thread pointer */
/* Initialize mstatus for new task:
* - MPIE=1: mret will copy this to MIE, enabling interrupts after task
@@ -535,6 +630,19 @@ void *hal_build_initial_frame(void *stack_top,
frame[FRAME_EPC] = (uint32_t) task_entry; /* mepc - entry point */
+ /* SP value for when ISR returns (frame[33] will hold this value).
+ * For U-mode: Set to user stack top (will be saved to frame[33] in ISR).
+ * For M-mode: Set to frame + ISR_STACK_FRAME_SIZE as before.
+ */
+ if (user_mode && kernel_stack) {
+ /* U-mode: frame[33] should contain user SP */
+ frame[FRAME_SP] =
+ (uint32_t) ((uint8_t *) stack_top - INITIAL_STACK_RESERVE);
+ } else {
+ /* M-mode: frame[33] contains kernel SP after frame deallocation */
+ frame[FRAME_SP] = (uint32_t) ((uint8_t *) frame + ISR_STACK_FRAME_SIZE);
+ }
+
return (void *) frame;
}
@@ -744,6 +852,21 @@ void hal_switch_stack(void **old_sp, void *new_sp)
pending_switch_sp = new_sp;
}
+/* Updates the kernel stack top for the current task.
+ * Called by dispatcher during context switch to set up mscratch for next trap.
+ */
+void hal_set_kernel_stack(void *kernel_stack, size_t kernel_stack_size)
+{
+ if (kernel_stack && kernel_stack_size > 0) {
+ /* U-mode task: point to top of per-task kernel stack */
+ current_kernel_stack_top =
+ (void *) ((uint8_t *) kernel_stack + kernel_stack_size);
+ } else {
+ /* M-mode task: NULL signals to use global _stack */
+ current_kernel_stack_top = NULL;
+ }
+}
+
/* Enable interrupts on first run of a task.
* Checks if task's return address still points to entry (meaning it hasn't
* run yet), and if so, enables global interrupts.
@@ -811,7 +934,25 @@ static void __attribute__((naked, used)) __dispatch_init_isr(void)
"lw t0, 32*4(sp)\n"
"csrw mstatus, t0\n"
- /* Restore mepc from frame[31] */
+ /* Initialize mscratch based on MPP field in mstatus.
+ * For M-mode set mscratch to zero, for U-mode set to kernel stack.
+ * ISR uses this to detect privilege mode via blind swap.
+ */
+ "srli t2, t0, 11\n"
+ "andi t2, t2, 0x3\n"
+ "bnez t2, .Ldispatch_mmode\n"
+
+ /* U-mode path */
+ "la t2, _stack\n"
+ "csrw mscratch, t2\n"
+ "j .Ldispatch_done\n"
+
+ /* M-mode path */
+ ".Ldispatch_mmode:\n"
+ "csrw mscratch, zero\n"
+ ".Ldispatch_done:\n"
+
+ /* Restore mepc */
"lw t1, 31*4(sp)\n"
"csrw mepc, t1\n"
diff --git a/arch/riscv/hal.h b/arch/riscv/hal.h
index 7946a0fe..0b436137 100644
--- a/arch/riscv/hal.h
+++ b/arch/riscv/hal.h
@@ -3,13 +3,21 @@
#include
/* Symbols from the linker script, defining memory boundaries */
-extern uint32_t _stack_start, _stack_end; /* Start/end of the STACK memory */
-extern uint32_t _heap_start, _heap_end; /* Start/end of the HEAP memory */
-extern uint32_t _heap_size; /* Size of HEAP memory */
+extern uint32_t _gp; /* Global pointer initialized at reset */
+extern uint32_t _stack; /* Kernel stack top for ISR and boot */
+extern uint32_t _stext, _etext; /* Start/end of the .text section */
extern uint32_t _sidata; /* Start address for .data initialization */
extern uint32_t _sdata, _edata; /* Start/end address for .data section */
extern uint32_t _sbss, _ebss; /* Start/end address for .bss section */
extern uint32_t _end; /* End of kernel image */
+extern uint32_t _heap_start, _heap_end; /* Start/end of the HEAP memory */
+extern uint32_t _heap_size; /* Size of HEAP memory */
+extern uint32_t _stack_bottom, _stack_top; /* Bottom/top of the STACK memory */
+
+/* Current trap handler nesting depth. Zero when not in trap context,
+ * increments for each nested trap entry, decrements on exit.
+ */
+extern uint32_t trap_nesting_depth;
/* Read a RISC-V Control and Status Register (CSR).
* @reg : The symbolic name of the CSR (e.g., mstatus).
@@ -27,6 +35,25 @@ extern uint32_t _end; /* End of kernel image */
*/
#define write_csr(reg, val) ({ asm volatile("csrw " #reg ", %0" ::"rK"(val)); })
+/* Read CSR by numeric address (for dynamic register selection).
+ * Used when CSR number is not known at compile-time (e.g., PMP registers).
+ * @csr_num : CSR address as a compile-time constant.
+ */
+#define read_csr_num(csr_num) \
+ ({ \
+ uint32_t __tmp; \
+ asm volatile("csrr %0, %1" : "=r"(__tmp) : "i"(csr_num)); \
+ __tmp; \
+ })
+
+/* Write CSR by numeric address (for dynamic register selection).
+ * Used when CSR number is not known at compile-time (e.g., PMP registers).
+ * @csr_num : CSR address as a compile-time constant.
+ * @val : The 32-bit value to write.
+ */
+#define write_csr_num(csr_num, val) \
+ ({ asm volatile("csrw %0, %1" ::"i"(csr_num), "rK"(val)); })
+
/* Globally enable or disable machine-level interrupts by setting mstatus.MIE.
* @enable : Non-zero to enable, zero to disable.
* Returns the previous state of the interrupt enable bit (1 if enabled, 0 if
@@ -88,6 +115,13 @@ void hal_dispatch_init(void *ctx);
*/
void hal_switch_stack(void **old_sp, void *new_sp);
+/* Updates the kernel stack top for the current task.
+ * Called by dispatcher during context switch to set up mscratch for next trap.
+ * @kernel_stack: Base address of task's kernel stack (NULL for M-mode tasks)
+ * @kernel_stack_size: Size of kernel stack in bytes (0 for M-mode tasks)
+ */
+void hal_set_kernel_stack(void *kernel_stack, size_t kernel_stack_size);
+
/* Provides a blocking, busy-wait delay.
* This function monopolizes the CPU and should only be used for very short
* delays or in pre-scheduling initialization code.
@@ -112,7 +146,9 @@ void hal_interrupt_tick(void); /* Enable interrupts on first task run */
void *hal_build_initial_frame(
void *stack_top,
void (*task_entry)(void),
- int user_mode); /* Build ISR frame for preemptive mode */
+ int user_mode,
+ void *kernel_stack,
+ size_t kernel_stack_size); /* Build ISR frame for preemptive mode */
/* Initializes the context structure for a new task.
* @ctx : Pointer to jmp_buf to initialize (must be non-NULL).
@@ -135,3 +171,10 @@ void hal_cpu_idle(void);
/* Default stack size for new tasks if not otherwise specified */
#define DEFAULT_STACK_SIZE 8192
+
+/* Physical Memory Protection (PMP) region limit constants */
+#define PMP_MAX_REGIONS 16 /* RISC-V supports 16 PMP regions */
+#define PMP_TOR_PAIRS \
+ 8 /* In TOR mode, 16 regions = 8 pairs (uses 2 addrs each) */
+#define MIN_PMP_REGION_SIZE \
+ 4 /* Minimum addressable size in TOR mode (4 bytes) */
diff --git a/arch/riscv/pmp.c b/arch/riscv/pmp.c
new file mode 100644
index 00000000..5e43873e
--- /dev/null
+++ b/arch/riscv/pmp.c
@@ -0,0 +1,734 @@
+/* RISC-V Physical Memory Protection (PMP) Implementation
+ *
+ * Provides hardware-enforced memory isolation using PMP in TOR mode.
+ */
+
+#include
+#include
+#include
+
+#include "csr.h"
+#include "pmp.h"
+#include "private/error.h"
+
+/* PMP CSR Access Helpers
+ *
+ * RISC-V CSR instructions require compile-time constant addresses encoded in
+ * the instruction itself. These helpers use switch-case dispatch to provide
+ * runtime indexed access to PMP configuration and address registers.
+ *
+ * - pmpcfg0-3: Four 32-bit configuration registers (16 regions, 8 bits each)
+ * - pmpaddr0-15: Sixteen address registers for TOR (Top-of-Range) mode
+ */
+
+/* Read PMP configuration register by index (0-3) */
+static uint32_t __attribute__((unused)) read_pmpcfg(uint8_t idx)
+{
+ switch (idx) {
+ case 0:
+ return read_csr_num(CSR_PMPCFG0);
+ case 1:
+ return read_csr_num(CSR_PMPCFG1);
+ case 2:
+ return read_csr_num(CSR_PMPCFG2);
+ case 3:
+ return read_csr_num(CSR_PMPCFG3);
+ default:
+ return 0;
+ }
+}
+
+/* Write PMP configuration register by index (0-3) */
+static void __attribute__((unused)) write_pmpcfg(uint8_t idx, uint32_t val)
+{
+ switch (idx) {
+ case 0:
+ write_csr_num(CSR_PMPCFG0, val);
+ break;
+ case 1:
+ write_csr_num(CSR_PMPCFG1, val);
+ break;
+ case 2:
+ write_csr_num(CSR_PMPCFG2, val);
+ break;
+ case 3:
+ write_csr_num(CSR_PMPCFG3, val);
+ break;
+ }
+}
+
+/* Read PMP address register by index (0-15) */
+static uint32_t __attribute__((unused)) read_pmpaddr(uint8_t idx)
+{
+ switch (idx) {
+ case 0:
+ return read_csr_num(CSR_PMPADDR0);
+ case 1:
+ return read_csr_num(CSR_PMPADDR1);
+ case 2:
+ return read_csr_num(CSR_PMPADDR2);
+ case 3:
+ return read_csr_num(CSR_PMPADDR3);
+ case 4:
+ return read_csr_num(CSR_PMPADDR4);
+ case 5:
+ return read_csr_num(CSR_PMPADDR5);
+ case 6:
+ return read_csr_num(CSR_PMPADDR6);
+ case 7:
+ return read_csr_num(CSR_PMPADDR7);
+ case 8:
+ return read_csr_num(CSR_PMPADDR8);
+ case 9:
+ return read_csr_num(CSR_PMPADDR9);
+ case 10:
+ return read_csr_num(CSR_PMPADDR10);
+ case 11:
+ return read_csr_num(CSR_PMPADDR11);
+ case 12:
+ return read_csr_num(CSR_PMPADDR12);
+ case 13:
+ return read_csr_num(CSR_PMPADDR13);
+ case 14:
+ return read_csr_num(CSR_PMPADDR14);
+ case 15:
+ return read_csr_num(CSR_PMPADDR15);
+ default:
+ return 0;
+ }
+}
+
+/* Write PMP address register by index (0-15) */
+static void __attribute__((unused)) write_pmpaddr(uint8_t idx, uint32_t val)
+{
+ switch (idx) {
+ case 0:
+ write_csr_num(CSR_PMPADDR0, val);
+ break;
+ case 1:
+ write_csr_num(CSR_PMPADDR1, val);
+ break;
+ case 2:
+ write_csr_num(CSR_PMPADDR2, val);
+ break;
+ case 3:
+ write_csr_num(CSR_PMPADDR3, val);
+ break;
+ case 4:
+ write_csr_num(CSR_PMPADDR4, val);
+ break;
+ case 5:
+ write_csr_num(CSR_PMPADDR5, val);
+ break;
+ case 6:
+ write_csr_num(CSR_PMPADDR6, val);
+ break;
+ case 7:
+ write_csr_num(CSR_PMPADDR7, val);
+ break;
+ case 8:
+ write_csr_num(CSR_PMPADDR8, val);
+ break;
+ case 9:
+ write_csr_num(CSR_PMPADDR9, val);
+ break;
+ case 10:
+ write_csr_num(CSR_PMPADDR10, val);
+ break;
+ case 11:
+ write_csr_num(CSR_PMPADDR11, val);
+ break;
+ case 12:
+ write_csr_num(CSR_PMPADDR12, val);
+ break;
+ case 13:
+ write_csr_num(CSR_PMPADDR13, val);
+ break;
+ case 14:
+ write_csr_num(CSR_PMPADDR14, val);
+ break;
+ case 15:
+ write_csr_num(CSR_PMPADDR15, val);
+ break;
+ }
+}
+
+/* Static Memory Pools for Boot-time PMP Initialization
+ *
+ * Defines kernel memory regions protected at boot. Each pool specifies
+ * a memory range and access permissions.
+ */
+static const mempool_t kernel_mempools[] = {
+ DECLARE_MEMPOOL("kernel_text",
+ &_stext,
+ &_etext,
+ PMPCFG_PERM_RX,
+ PMP_PRIORITY_KERNEL),
+ DECLARE_MEMPOOL("kernel_data",
+ &_sdata,
+ &_edata,
+ PMPCFG_PERM_RW,
+ PMP_PRIORITY_KERNEL),
+ DECLARE_MEMPOOL("kernel_bss",
+ &_sbss,
+ &_ebss,
+ PMPCFG_PERM_RW,
+ PMP_PRIORITY_KERNEL),
+};
+
+#define KERNEL_MEMPOOL_COUNT \
+ (sizeof(kernel_mempools) / sizeof(kernel_mempools[0]))
+
+/* Global PMP configuration (shadow of hardware state) */
+static pmp_config_t pmp_global_config;
+
+/* Helper to compute pmpcfg register index and bit offset for a given region */
+static inline void pmp_get_cfg_indices(uint8_t region_idx,
+ uint8_t *cfg_idx,
+ uint8_t *cfg_offset)
+{
+ *cfg_idx = region_idx / 4;
+ *cfg_offset = (region_idx % 4) * 8;
+}
+
+pmp_config_t *pmp_get_config(void)
+{
+ return &pmp_global_config;
+}
+
+int32_t pmp_init(pmp_config_t *config)
+{
+ if (!config)
+ return ERR_PMP_INVALID_REGION;
+
+ /* Clear all PMP regions in hardware and shadow configuration */
+ for (uint8_t i = 0; i < PMP_MAX_REGIONS; i++) {
+ write_pmpaddr(i, 0);
+ if (i % 4 == 0)
+ write_pmpcfg(i / 4, 0);
+
+ config->regions[i].addr_start = 0;
+ config->regions[i].addr_end = 0;
+ config->regions[i].permissions = 0;
+ config->regions[i].priority = PMP_PRIORITY_TEMPORARY;
+ config->regions[i].region_id = i;
+ config->regions[i].locked = 0;
+ }
+
+ config->region_count = 0;
+ config->next_region_idx = 0;
+ config->initialized = 1;
+
+ return ERR_OK;
+}
+int32_t pmp_init_pools(pmp_config_t *config,
+ const mempool_t *pools,
+ size_t count)
+{
+ if (!config || !pools || count == 0)
+ return ERR_PMP_INVALID_REGION;
+
+ /* Initialize PMP hardware and state */
+ int32_t ret = pmp_init(config);
+ if (ret < 0)
+ return ret;
+
+ /* Configure each memory pool as a PMP region */
+ for (size_t i = 0; i < count; i++) {
+ const mempool_t *pool = &pools[i];
+
+ /* Validate pool boundaries */
+ if (pool->start >= pool->end)
+ return ERR_PMP_ADDR_RANGE;
+
+ /* Prepare PMP region configuration */
+ pmp_region_t region = {
+ .addr_start = pool->start,
+ .addr_end = pool->end,
+ .permissions = pool->flags & (PMPCFG_R | PMPCFG_W | PMPCFG_X),
+ .priority = pool->tag,
+ .region_id = i,
+ .locked = (pool->tag == PMP_PRIORITY_KERNEL),
+ };
+
+ /* Configure the PMP region */
+ ret = pmp_set_region(config, ®ion);
+ if (ret < 0)
+ return ret;
+ }
+
+ return ERR_OK;
+}
+
+int32_t pmp_init_kernel(pmp_config_t *config)
+{
+ return pmp_init_pools(config, kernel_mempools, KERNEL_MEMPOOL_COUNT);
+}
+
+int32_t pmp_set_region(pmp_config_t *config, const pmp_region_t *region)
+{
+ if (!config || !region)
+ return ERR_PMP_INVALID_REGION;
+
+ /* Validate region index is within bounds */
+ if (region->region_id >= PMP_MAX_REGIONS)
+ return ERR_PMP_INVALID_REGION;
+
+ /* Validate address range */
+ if (region->addr_start >= region->addr_end)
+ return ERR_PMP_ADDR_RANGE;
+
+ /* Check if region is already locked */
+ if (config->regions[region->region_id].locked)
+ return ERR_PMP_LOCKED;
+
+ uint8_t region_idx = region->region_id;
+ uint8_t pmpcfg_idx, pmpcfg_offset;
+ pmp_get_cfg_indices(region_idx, &pmpcfg_idx, &pmpcfg_offset);
+
+ /* Build configuration byte with TOR mode and permissions */
+ uint8_t pmpcfg_perm =
+ region->permissions & (PMPCFG_R | PMPCFG_W | PMPCFG_X);
+ uint8_t pmpcfg_byte = PMPCFG_A_TOR | pmpcfg_perm;
+
+ /* Read current pmpcfg register to preserve other regions */
+ uint32_t pmpcfg_val = read_pmpcfg(pmpcfg_idx);
+
+ /* Clear the configuration byte for this region */
+ pmpcfg_val &= ~(0xFFU << pmpcfg_offset);
+
+ /* Write new configuration byte */
+ pmpcfg_val |= (pmpcfg_byte << pmpcfg_offset);
+
+ /* Write pmpaddr register with the upper boundary */
+ write_pmpaddr(region_idx, region->addr_end >> 2);
+
+ /* Write pmpcfg register with updated configuration */
+ write_pmpcfg(pmpcfg_idx, pmpcfg_val);
+
+ /* Update shadow configuration */
+ config->regions[region_idx].addr_start = region->addr_start;
+ config->regions[region_idx].addr_end = region->addr_end;
+ config->regions[region_idx].permissions = region->permissions;
+ config->regions[region_idx].priority = region->priority;
+ config->regions[region_idx].region_id = region_idx;
+ config->regions[region_idx].locked = region->locked;
+
+ /* Update region count if this is a newly used region */
+ if (region_idx >= config->region_count)
+ config->region_count = region_idx + 1;
+
+ return ERR_OK;
+}
+
+int32_t pmp_disable_region(pmp_config_t *config, uint8_t region_idx)
+{
+ if (!config)
+ return ERR_PMP_INVALID_REGION;
+
+ /* Validate region index is within bounds */
+ if (region_idx >= PMP_MAX_REGIONS)
+ return ERR_PMP_INVALID_REGION;
+
+ /* Check if region is already locked */
+ if (config->regions[region_idx].locked)
+ return ERR_PMP_LOCKED;
+
+ uint8_t pmpcfg_idx, pmpcfg_offset;
+ pmp_get_cfg_indices(region_idx, &pmpcfg_idx, &pmpcfg_offset);
+
+ /* Read current pmpcfg register to preserve other regions */
+ uint32_t pmpcfg_val = read_pmpcfg(pmpcfg_idx);
+
+ /* Clear the configuration byte for this region (disables it) */
+ pmpcfg_val &= ~(0xFFU << pmpcfg_offset);
+
+ /* Write pmpcfg register with updated configuration */
+ write_pmpcfg(pmpcfg_idx, pmpcfg_val);
+
+ /* Update shadow configuration */
+ config->regions[region_idx].addr_start = 0;
+ config->regions[region_idx].addr_end = 0;
+ config->regions[region_idx].permissions = 0;
+
+ return ERR_OK;
+}
+
+int32_t pmp_lock_region(pmp_config_t *config, uint8_t region_idx)
+{
+ if (!config)
+ return ERR_PMP_INVALID_REGION;
+
+ /* Validate region index is within bounds */
+ if (region_idx >= PMP_MAX_REGIONS)
+ return ERR_PMP_INVALID_REGION;
+
+ uint8_t pmpcfg_idx, pmpcfg_offset;
+ pmp_get_cfg_indices(region_idx, &pmpcfg_idx, &pmpcfg_offset);
+
+ /* Read current pmpcfg register to preserve other regions */
+ uint32_t pmpcfg_val = read_pmpcfg(pmpcfg_idx);
+
+ /* Get current configuration byte for this region */
+ uint8_t pmpcfg_byte = (pmpcfg_val >> pmpcfg_offset) & 0xFFU;
+
+ /* Set lock bit */
+ pmpcfg_byte |= PMPCFG_L;
+
+ /* Clear the configuration byte for this region */
+ pmpcfg_val &= ~(0xFFU << pmpcfg_offset);
+
+ /* Write new configuration byte with lock bit set */
+ pmpcfg_val |= (pmpcfg_byte << pmpcfg_offset);
+
+ /* Write pmpcfg register with updated configuration */
+ write_pmpcfg(pmpcfg_idx, pmpcfg_val);
+
+ /* Update shadow configuration */
+ config->regions[region_idx].locked = 1;
+
+ return ERR_OK;
+}
+
+int32_t pmp_get_region(const pmp_config_t *config,
+ uint8_t region_idx,
+ pmp_region_t *region)
+{
+ if (!config || !region)
+ return ERR_PMP_INVALID_REGION;
+
+ /* Validate region index is within bounds */
+ if (region_idx >= PMP_MAX_REGIONS)
+ return ERR_PMP_INVALID_REGION;
+
+ uint8_t pmpcfg_idx, pmpcfg_offset;
+ pmp_get_cfg_indices(region_idx, &pmpcfg_idx, &pmpcfg_offset);
+
+ /* Read the address and configuration from shadow configuration */
+ region->addr_start = config->regions[region_idx].addr_start;
+ region->addr_end = config->regions[region_idx].addr_end;
+ region->permissions = config->regions[region_idx].permissions;
+ region->priority = config->regions[region_idx].priority;
+ region->region_id = region_idx;
+ region->locked = config->regions[region_idx].locked;
+
+ return ERR_OK;
+}
+
+int32_t pmp_check_access(const pmp_config_t *config,
+ uint32_t addr,
+ uint32_t size,
+ uint8_t is_write,
+ uint8_t is_execute)
+{
+ if (!config)
+ return ERR_PMP_INVALID_REGION;
+
+ uint32_t access_end = addr + size;
+
+ /* In TOR mode, check all regions in priority order */
+ for (uint8_t i = 0; i < config->region_count; i++) {
+ const pmp_region_t *region = &config->regions[i];
+
+ /* Skip disabled regions */
+ if (region->addr_start == 0 && region->addr_end == 0)
+ continue;
+
+ /* Check if access falls within this region */
+ if (addr >= region->addr_start && access_end <= region->addr_end) {
+ /* Verify permissions match access type */
+ uint8_t required_perm = 0;
+ if (is_write)
+ required_perm |= PMPCFG_W;
+ if (is_execute)
+ required_perm |= PMPCFG_X;
+ if (!is_write && !is_execute)
+ required_perm = PMPCFG_R;
+
+ if ((region->permissions & required_perm) == required_perm)
+ return 1; /* Access allowed */
+ else
+ return 0; /* Access denied */
+ }
+ }
+
+ /* Access not covered by any region */
+ return 0;
+}
+
+/* Selects victim flexpage for eviction using priority-based algorithm.
+ *
+ * @mspace : Pointer to memory space
+ * Returns pointer to victim flexpage, or NULL if no evictable page found.
+ */
+static fpage_t *select_victim_fpage(memspace_t *mspace)
+{
+ if (!mspace)
+ return NULL;
+
+ fpage_t *victim = NULL;
+ uint32_t lowest_prio = 0;
+
+ /* Select page with highest priority value (lowest priority).
+ * Kernel regions (priority 0) are never selected. */
+ for (fpage_t *fp = mspace->pmp_first; fp; fp = fp->pmp_next) {
+ if (fp->priority > lowest_prio) {
+ victim = fp;
+ lowest_prio = fp->priority;
+ }
+ }
+
+ return victim;
+}
+
+/* Sets base address for a TOR paired region entry */
+static void pmp_set_base_entry(uint8_t entry_idx, uint32_t base_addr)
+{
+ if (entry_idx >= PMP_MAX_REGIONS)
+ return;
+
+ write_pmpaddr(entry_idx, base_addr >> 2);
+}
+
+/* Loads a flexpage into a PMP hardware region */
+int32_t pmp_load_fpage(fpage_t *fpage, uint8_t region_idx)
+{
+ if (!fpage || region_idx >= PMP_MAX_REGIONS)
+ return -1;
+
+ pmp_config_t *config = pmp_get_config();
+ if (!config)
+ return -1;
+
+ uint32_t base = fpage->base;
+ uint32_t size = fpage->size;
+ uint32_t end = base + size;
+
+ /* User regions use paired entries (base + top), kernel regions use single
+ * entry */
+ if (PMP_IS_USER_REGION(region_idx)) {
+ uint8_t base_entry = PMP_USER_BASE_ENTRY(region_idx);
+ uint8_t top_entry = PMP_USER_TOP_ENTRY(region_idx);
+
+ if (top_entry >= PMP_MAX_REGIONS) {
+ return -1;
+ }
+
+ /* Set base entry (address-only, pmpcfg=0) */
+ pmp_set_base_entry(base_entry, base);
+ config->regions[base_entry].addr_start = base;
+ config->regions[base_entry].addr_end = base;
+ config->regions[base_entry].permissions = 0;
+ config->regions[base_entry].locked = 0;
+
+ /* Set top entry (TOR mode with permissions) */
+ pmp_region_t top_region = {
+ .addr_start = base,
+ .addr_end = end,
+ .permissions = fpage->rwx,
+ .priority = fpage->priority,
+ .region_id = top_entry,
+ .locked = 0,
+ };
+
+ int32_t ret = pmp_set_region(config, &top_region);
+ if (ret < 0)
+ return ret;
+
+ fpage->pmp_id = base_entry;
+
+ } else {
+ /* Kernel region: single entry TOR mode */
+ pmp_region_t region = {
+ .addr_start = base,
+ .addr_end = end,
+ .permissions = fpage->rwx,
+ .priority = fpage->priority,
+ .region_id = region_idx,
+ .locked = 0,
+ };
+
+ int32_t ret = pmp_set_region(config, ®ion);
+ if (ret < 0)
+ return ret;
+
+ fpage->pmp_id = region_idx;
+ }
+
+ return 0;
+}
+
+/* Evicts a flexpage from its PMP hardware region */
+int32_t pmp_evict_fpage(fpage_t *fpage)
+{
+ if (!fpage)
+ return -1;
+
+ /* Only evict if actually loaded into PMP */
+ if (fpage->pmp_id == PMP_INVALID_REGION)
+ return 0;
+
+ pmp_config_t *config = pmp_get_config();
+ if (!config)
+ return -1;
+
+ uint8_t region_idx = fpage->pmp_id;
+
+ /* User regions need to clear both base and top entries */
+ if (PMP_IS_USER_REGION(region_idx)) {
+ uint8_t base_entry = PMP_USER_BASE_ENTRY(region_idx);
+ uint8_t top_entry = PMP_USER_TOP_ENTRY(region_idx);
+
+ /* Clear base entry (address and shadow config) */
+ pmp_set_base_entry(base_entry, 0);
+ config->regions[base_entry].addr_start = 0;
+ config->regions[base_entry].addr_end = 0;
+ config->regions[base_entry].permissions = 0;
+
+ /* Clear top entry using existing pmp_disable_region() */
+ int32_t ret = pmp_disable_region(config, top_entry);
+ if (ret < 0)
+ return ret;
+
+ } else {
+ /* Kernel region uses simple single-entry eviction */
+ int32_t ret = pmp_disable_region(config, region_idx);
+ if (ret < 0)
+ return ret;
+ }
+
+ fpage->pmp_id = PMP_INVALID_REGION;
+ return 0;
+}
+
+/* Handles PMP access faults by loading the required flexpage into hardware. */
+int32_t pmp_handle_access_fault(uint32_t fault_addr, uint8_t is_write)
+{
+ if (!kcb || !kcb->task_current || !kcb->task_current->data)
+ return PMP_FAULT_UNHANDLED;
+
+ tcb_t *current = (tcb_t *) kcb->task_current->data;
+ memspace_t *mspace = current->mspace;
+ if (!mspace)
+ return PMP_FAULT_UNHANDLED;
+
+ /* Find flexpage containing faulting address */
+ fpage_t *target_fpage = NULL;
+ for (fpage_t *fp = mspace->first; fp; fp = fp->as_next) {
+ if (fault_addr >= fp->base && fault_addr < (fp->base + fp->size)) {
+ target_fpage = fp;
+ break;
+ }
+ }
+
+ /* Cannot recover: address not in task's memory space or already loaded */
+ if (!target_fpage || target_fpage->pmp_id != PMP_INVALID_REGION) {
+ /* Mark task as zombie for deferred cleanup */
+ current->state = TASK_ZOMBIE;
+ return PMP_FAULT_TERMINATE;
+ }
+
+ pmp_config_t *config = pmp_get_config();
+ if (!config)
+ return PMP_FAULT_UNHANDLED;
+
+ /* Load into available region or evict victim */
+ if (config->next_region_idx < PMP_MAX_REGIONS)
+ return pmp_load_fpage(target_fpage, config->next_region_idx);
+
+ fpage_t *victim = select_victim_fpage(mspace);
+ if (!victim)
+ return PMP_FAULT_UNHANDLED;
+
+ int32_t ret = pmp_evict_fpage(victim);
+ return (ret == 0) ? pmp_load_fpage(target_fpage, victim->pmp_id) : ret;
+}
+
+/* Finds next available PMP region slot
+ *
+ * User regions require two consecutive free entries.
+ * Kernel regions require single entry.
+ *
+ * Returns region index on success, -1 if none available.
+ */
+static int8_t find_free_region_slot(const pmp_config_t *config)
+{
+ if (!config)
+ return -1;
+
+ for (uint8_t i = 0; i < PMP_MAX_REGIONS; i++) {
+ /* Skip locked regions */
+ if (config->regions[i].locked)
+ continue;
+
+ bool is_free = (config->regions[i].addr_start == 0 &&
+ config->regions[i].addr_end == 0);
+
+ if (!is_free)
+ continue;
+
+ /* Kernel regions use single entry */
+ if (i < PMP_USER_REGION_START)
+ return i;
+
+ /* User regions need two consecutive slots */
+ if (i + 1 < PMP_MAX_REGIONS) {
+ bool next_is_free = (config->regions[i + 1].addr_start == 0 &&
+ config->regions[i + 1].addr_end == 0);
+ bool next_not_locked = !config->regions[i + 1].locked;
+
+ if (next_is_free && next_not_locked)
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+int32_t pmp_switch_context(memspace_t *old_mspace, memspace_t *new_mspace)
+{
+ if (old_mspace == new_mspace) {
+ return 0;
+ }
+
+ pmp_config_t *config = pmp_get_config();
+ if (!config) {
+ return -1;
+ }
+
+ /* Evict old task's dynamic regions */
+ if (old_mspace) {
+ for (fpage_t *fp = old_mspace->pmp_first; fp; fp = fp->pmp_next) {
+ /* pmp_evict_fpage correctly handles paired entries */
+ if (fp->pmp_id != PMP_INVALID_REGION) {
+ pmp_evict_fpage(fp);
+ }
+ }
+ }
+
+ /* Load new task's regions and rebuild tracking list */
+ if (new_mspace) {
+ new_mspace->pmp_first = NULL;
+
+ for (fpage_t *fp = new_mspace->first; fp; fp = fp->as_next) {
+ /* Shared regions may already be loaded */
+ if (fp->pmp_id != PMP_INVALID_REGION) {
+ fp->pmp_next = new_mspace->pmp_first;
+ new_mspace->pmp_first = fp;
+ continue;
+ }
+
+ int32_t region_idx = find_free_region_slot(config);
+ if (region_idx < 0)
+ break;
+
+ if (pmp_load_fpage(fp, (uint8_t) region_idx) != 0)
+ break;
+
+ fp->pmp_next = new_mspace->pmp_first;
+ new_mspace->pmp_first = fp;
+ }
+ }
+
+ return 0;
+}
diff --git a/arch/riscv/pmp.h b/arch/riscv/pmp.h
new file mode 100644
index 00000000..89c066f0
--- /dev/null
+++ b/arch/riscv/pmp.h
@@ -0,0 +1,186 @@
+/* RISC-V Physical Memory Protection (PMP) Hardware Layer
+ *
+ * Low-level interface to RISC-V PMP using TOR (Top-of-Range) mode for
+ * flexible region management without alignment constraints.
+ */
+
+#pragma once
+
+#include
+#include
+#include
+
+#include "csr.h"
+
+/* PMP Region Priority Levels (lower value = higher priority)
+ *
+ * Used for eviction decisions when hardware PMP regions are exhausted.
+ */
+typedef enum {
+ PMP_PRIORITY_KERNEL = 0,
+ PMP_PRIORITY_STACK = 1,
+ PMP_PRIORITY_SHARED = 2,
+ PMP_PRIORITY_TEMPORARY = 3,
+ PMP_PRIORITY_COUNT = 4
+} pmp_priority_t;
+
+/* PMP TOR Mode Entry Layout
+ *
+ * Kernel regions (0-2) use single entries configured at boot.
+ * User dynamic regions (3+) use paired entries for flexible boundaries:
+ * - Base entry: Lower bound address
+ * - Top entry: Upper bound address with permissions
+ * Paired entries enable non-contiguous regions without NAPOT alignment.
+ */
+#define PMP_KERNEL_REGIONS 3 /* Regions 0-2 for kernel */
+#define PMP_USER_REGION_START 3 /* User regions start from 3 */
+#define PMP_ENTRIES_PER_USER 2 /* Each user region uses 2 entries */
+#define PMP_MAX_USER_REGIONS \
+ ((PMP_MAX_REGIONS - PMP_USER_REGION_START) / PMP_ENTRIES_PER_USER)
+
+/* Invalid region marker (fpage not loaded into any PMP region) */
+#define PMP_INVALID_REGION 0xFF
+
+/* Check if a region index is a user region requiring paired entries */
+#define PMP_IS_USER_REGION(idx) ((idx) >= PMP_USER_REGION_START)
+
+/* Convert user region index to hardware entry pair */
+#define PMP_USER_BASE_ENTRY(idx) (idx)
+#define PMP_USER_TOP_ENTRY(idx) ((idx) + 1)
+
+/* PMP Region Configuration */
+typedef struct {
+ uint32_t addr_start; /* Start address (inclusive) */
+ uint32_t addr_end; /* End address (exclusive, written to pmpaddr) */
+ uint8_t permissions; /* R/W/X bits (PMPCFG_R | PMPCFG_W | PMPCFG_X) */
+ pmp_priority_t priority; /* Eviction priority */
+ uint8_t region_id; /* Hardware region index (0-15) */
+ uint8_t locked; /* Lock bit (cannot modify until reset) */
+} pmp_region_t;
+
+/* PMP Global State */
+typedef struct {
+ pmp_region_t regions[PMP_MAX_REGIONS]; /* Shadow of hardware config */
+ uint8_t region_count; /* Active region count */
+ uint8_t next_region_idx; /* Next free region index */
+ uint32_t initialized; /* Initialization flag */
+} pmp_config_t;
+
+/* PMP Management Functions */
+
+/* Returns pointer to global PMP configuration */
+pmp_config_t *pmp_get_config(void);
+
+/* Initializes the PMP hardware and configuration state.
+ * @config : Pointer to pmp_config_t structure to be initialized.
+ * Returns 0 on success, or negative error code on failure.
+ */
+int32_t pmp_init(pmp_config_t *config);
+
+/* Configures a single PMP region in TOR mode.
+ * @config : Pointer to PMP configuration state
+ * @region : Pointer to pmp_region_t structure with desired configuration
+ * Returns 0 on success, or negative error code on failure.
+ */
+int32_t pmp_set_region(pmp_config_t *config, const pmp_region_t *region);
+
+/* Reads the current configuration of a PMP region.
+ * @config : Pointer to PMP configuration state
+ * @region_idx : Index of the region to read (0-15)
+ * @region : Pointer to pmp_region_t to store the result
+ * Returns 0 on success, or negative error code on failure.
+ */
+int32_t pmp_get_region(const pmp_config_t *config,
+ uint8_t region_idx,
+ pmp_region_t *region);
+
+/* Disables a PMP region.
+ * @config : Pointer to PMP configuration state
+ * @region_idx : Index of the region to disable (0-15)
+ * Returns 0 on success, or negative error code on failure.
+ */
+int32_t pmp_disable_region(pmp_config_t *config, uint8_t region_idx);
+
+/* Locks a PMP region to prevent further modification.
+ * @config : Pointer to PMP configuration state
+ * @region_idx : Index of the region to lock (0-15)
+ * Returns 0 on success, or negative error code on failure.
+ */
+int32_t pmp_lock_region(pmp_config_t *config, uint8_t region_idx);
+
+/* Verifies that a memory access is allowed by the current PMP configuration.
+ * @config : Pointer to PMP configuration state
+ * @addr : Address to check
+ * @size : Size of the access in bytes
+ * @is_write : 1 for write access, 0 for read access
+ * @is_execute : 1 for execute access, 0 for data access
+ * Returns 1 if access is allowed, 0 if denied, or negative error code.
+ */
+int32_t pmp_check_access(const pmp_config_t *config,
+ uint32_t addr,
+ uint32_t size,
+ uint8_t is_write,
+ uint8_t is_execute);
+
+/* Memory Pool Management Functions */
+
+/* Initializes PMP regions from an array of memory pool descriptors.
+ * @config : Pointer to PMP configuration state
+ * @pools : Array of memory pool descriptors
+ * @count : Number of pools in the array
+ * Returns 0 on success, or negative error code on failure.
+ */
+int32_t pmp_init_pools(pmp_config_t *config,
+ const mempool_t *pools,
+ size_t count);
+
+/* Initializes PMP with default kernel memory pools.
+ * @config : Pointer to PMP configuration state
+ * Returns 0 on success, or negative error code on failure.
+ */
+int32_t pmp_init_kernel(pmp_config_t *config);
+
+/* Flexpage Hardware Loading Functions */
+
+/* Loads a flexpage into a PMP hardware region.
+ * @fpage : Pointer to flexpage to load
+ * @region_idx : Hardware PMP region index (0-15)
+ * Returns 0 on success, or negative error code on failure.
+ */
+int32_t pmp_load_fpage(fpage_t *fpage, uint8_t region_idx);
+
+/* Evicts a flexpage from its PMP hardware region.
+ * @fpage : Pointer to flexpage to evict
+ * Returns 0 on success, or negative error code on failure.
+ */
+int32_t pmp_evict_fpage(fpage_t *fpage);
+
+/* PMP Fault Handler Return Codes */
+#define PMP_FAULT_RECOVERED 0 /* Fault recovered, resume execution */
+#define PMP_FAULT_UNHANDLED (-1) /* Cannot recover, fall through to default */
+#define PMP_FAULT_TERMINATE \
+ (-2) /* Task terminated, caller invokes dispatcher */
+
+/* Handles PMP access violations.
+ *
+ * Attempts to recover from PMP access faults by loading the required memory
+ * region into a hardware PMP region. If all 16 regions are in use, selects a
+ * victim for eviction and reuses its region.
+ *
+ * @fault_addr : The faulting memory address (from mtval CSR)
+ * @is_write : 1 for store/AMO access, 0 for load
+ * Returns PMP_FAULT_RECOVERED, PMP_FAULT_UNHANDLED, or PMP_FAULT_TERMINATE.
+ */
+int32_t pmp_handle_access_fault(uint32_t fault_addr, uint8_t is_write);
+
+/* Switches PMP configuration during task context switch.
+ *
+ * Evicts the old task's dynamic regions from hardware and loads the new
+ * task's regions into available PMP slots. Kernel regions marked as locked
+ * are preserved across all context switches.
+ *
+ * @old_mspace : Memory space of task being switched out (can be NULL)
+ * @new_mspace : Memory space of task being switched in (can be NULL)
+ * Returns 0 on success, negative error code on failure.
+ */
+int32_t pmp_switch_context(memspace_t *old_mspace, memspace_t *new_mspace);
diff --git a/include/linmo.h b/include/linmo.h
index e4cbecfc..d52cf8ed 100644
--- a/include/linmo.h
+++ b/include/linmo.h
@@ -7,6 +7,7 @@
#include
#include
+#include
#include
#include
#include
diff --git a/include/private/error.h b/include/private/error.h
index 5589087b..33f5d113 100644
--- a/include/private/error.h
+++ b/include/private/error.h
@@ -29,6 +29,14 @@ enum {
ERR_STACK_CHECK, /* Stack overflow or corruption detected */
ERR_HEAP_CORRUPT, /* Heap corruption or invalid free detected */
+ /* PMP Configuration Errors */
+ ERR_PMP_INVALID_REGION, /* Invalid PMP region parameters */
+ ERR_PMP_NO_REGIONS, /* No free PMP regions available */
+ ERR_PMP_LOCKED, /* Region is locked by higher priority */
+ ERR_PMP_SIZE_MISMATCH, /* Size doesn't meet alignment requirements */
+ ERR_PMP_ADDR_RANGE, /* Address range is invalid */
+ ERR_PMP_NOT_INIT, /* PMP not initialized */
+
/* IPC and Synchronization Errors */
ERR_PIPE_ALLOC, /* Pipe allocation failed */
ERR_PIPE_DEALLOC, /* Pipe deallocation failed */
diff --git a/include/sys/memprot.h b/include/sys/memprot.h
new file mode 100644
index 00000000..ff202ba8
--- /dev/null
+++ b/include/sys/memprot.h
@@ -0,0 +1,134 @@
+/* Memory Protection Abstractions
+ *
+ * Software abstractions for managing memory protection at different
+ * granularities. These structures build upon hardware protection
+ * mechanisms (such as RISC-V PMP) to provide flexible, architecture-
+ * independent memory isolation.
+ */
+
+#pragma once
+
+#include
+
+/* Forward declarations */
+struct fpage;
+struct as;
+
+/* Flexpage
+ *
+ * Contiguous physical memory region with hardware-enforced protection.
+ * Supports arbitrary base addresses and sizes without alignment constraints.
+ */
+typedef struct fpage {
+ struct fpage *as_next; /* Next in address space list */
+ struct fpage *map_next; /* Next in mapping chain */
+ struct fpage *pmp_next; /* Next in PMP queue */
+ uint32_t base; /* Physical base address */
+ uint32_t size; /* Region size */
+ uint32_t rwx; /* R/W/X permission bits */
+ uint32_t pmp_id; /* PMP region index */
+ uint32_t flags; /* Status flags */
+ uint32_t priority; /* Eviction priority */
+ int used; /* Usage counter */
+} fpage_t;
+
+/* Memory Space
+ *
+ * Collection of flexpages forming a task's memory view. Can be shared
+ * across multiple tasks.
+ */
+typedef struct memspace {
+ uint32_t as_id; /* Memory space identifier */
+ struct fpage *first; /* Head of flexpage list */
+ struct fpage *pmp_first; /* Head of PMP-loaded list */
+ struct fpage *pmp_stack; /* Stack regions */
+ uint32_t shared; /* Shared flag */
+} memspace_t;
+
+/* Memory Pool
+ *
+ * Static memory region descriptor for boot-time PMP initialization.
+ */
+typedef struct {
+ const char *name; /* Pool name */
+ uintptr_t start; /* Start address */
+ uintptr_t end; /* End address */
+ uint32_t flags; /* Access permissions */
+ uint32_t tag; /* Pool type/priority */
+} mempool_t;
+
+/* Memory Pool Declaration Helpers
+ *
+ * Simplifies memory pool initialization with designated initializers.
+ * DECLARE_MEMPOOL_FROM_SYMBOLS uses token concatenation to construct
+ * linker symbol names automatically.
+ */
+#define DECLARE_MEMPOOL(name_, start_, end_, flags_, tag_) \
+ { \
+ .name = (name_), .start = (uintptr_t) (start_), \
+ .end = (uintptr_t) (end_), .flags = (flags_), .tag = (tag_), \
+ }
+
+#define DECLARE_MEMPOOL_FROM_SYMBOLS(name_, sym_base_, flags_, tag_) \
+ DECLARE_MEMPOOL((name_), &(sym_base_##_start), &(sym_base_##_end), \
+ (flags_), (tag_))
+
+/* Flexpage Management Functions */
+
+/* Creates and initializes a new flexpage.
+ * @base : Physical base address
+ * @size : Size in bytes
+ * @rwx : Permission bits
+ * @priority : Eviction priority
+ * Returns pointer to created flexpage, or NULL on failure.
+ */
+fpage_t *mo_fpage_create(uint32_t base,
+ uint32_t size,
+ uint32_t rwx,
+ uint32_t priority);
+
+/* Destroys a flexpage.
+ * @fpage : Pointer to flexpage to destroy
+ */
+void mo_fpage_destroy(fpage_t *fpage);
+
+/* Memory Space Management Functions */
+
+/* Creates and initializes a memory space.
+ * @as_id : Memory space identifier
+ * @shared : Whether this space can be shared across tasks
+ * Returns pointer to created memory space, or NULL on failure.
+ */
+memspace_t *mo_memspace_create(uint32_t as_id, uint32_t shared);
+
+/* Destroys a memory space and all its flexpages.
+ * @mspace : Pointer to memory space to destroy
+ */
+void mo_memspace_destroy(memspace_t *mspace);
+
+/* Flexpage Hardware Loading Functions */
+
+/* Loads a flexpage into a hardware region.
+ * @fpage : Pointer to flexpage to load
+ * @region_idx : Hardware region index (0-15)
+ * Returns 0 on success, or negative error code on failure.
+ */
+int32_t mo_load_fpage(fpage_t *fpage, uint8_t region_idx);
+
+/* Evicts a flexpage from its hardware region.
+ * @fpage : Pointer to flexpage to evict
+ * Returns 0 on success, or negative error code on failure.
+ */
+int32_t mo_evict_fpage(fpage_t *fpage);
+
+/* Handles memory access faults.
+ *
+ * Attempts to recover from access faults by loading the required memory
+ * region into a hardware region. If all regions are in use, selects a
+ * victim for eviction and reuses its region.
+ *
+ * @fault_addr : The faulting memory address
+ * @is_write : 1 for write access, 0 for read access
+ * Returns 0 on successful recovery, negative error code on failure.
+ */
+int32_t mo_handle_access_fault(uint32_t fault_addr, uint8_t is_write);
diff --git a/include/sys/task.h b/include/sys/task.h
index ccf5f4fa..27a433d5 100644
--- a/include/sys/task.h
+++ b/include/sys/task.h
@@ -37,11 +37,12 @@ enum task_priorities {
/* Task Lifecycle States */
enum task_states {
- TASK_STOPPED, /* Task created but not yet scheduled */
- TASK_READY, /* Task in ready state, waiting to be scheduled */
- TASK_RUNNING, /* Task currently executing on CPU */
- TASK_BLOCKED, /* Task waiting for delay timer to expire */
- TASK_SUSPENDED /* Task paused/excluded from scheduling until resumed */
+ TASK_STOPPED, /* Task created but not yet scheduled */
+ TASK_READY, /* Task in ready state, waiting to be scheduled */
+ TASK_RUNNING, /* Task currently executing on CPU */
+ TASK_BLOCKED, /* Task waiting for delay timer to expire */
+ TASK_SUSPENDED, /* Task paused/excluded from scheduling until resumed */
+ TASK_ZOMBIE /* Task terminated, awaiting resource cleanup */
};
/* Priority Level Constants for Priority-Aware Time Slicing */
@@ -59,6 +60,9 @@ enum task_states {
#define TASK_TIMESLICE_LOW 10 /* Low priority: longer slice */
#define TASK_TIMESLICE_IDLE 15 /* Idle tasks: longest slice */
+/* Forward declaration */
+struct memspace;
+
/* Task Control Block (TCB)
*
* Contains all essential information about a single task, including saved
@@ -72,6 +76,13 @@ typedef struct tcb {
size_t stack_sz; /* Total size of the stack in bytes */
void (*entry)(void); /* Task's entry point function */
+ /* Kernel Stack for U-mode Tasks */
+ void *kernel_stack; /* Base address of kernel stack (NULL for M-mode) */
+ size_t kernel_stack_size; /* Size of kernel stack in bytes (0 for M-mode) */
+
+ /* Memory Protection */
+ struct memspace *mspace; /* Memory space for task isolation */
+
/* Scheduling Parameters */
uint16_t prio; /* Encoded priority (base and time slice counter) */
uint8_t prio_level; /* Priority level (0-7, 0 = highest) */
diff --git a/kernel/main.c b/kernel/main.c
index 0015dca7..b7eaa2e4 100644
--- a/kernel/main.c
+++ b/kernel/main.c
@@ -72,6 +72,11 @@ int32_t main(void)
*/
scheduler_started = true;
+ /* Initialize kernel stack for first task */
+ if (kcb->preemptive)
+ hal_set_kernel_stack(first_task->kernel_stack,
+ first_task->kernel_stack_size);
+
/* In preemptive mode, tasks are managed via ISR frames (sp).
* In cooperative mode, tasks are managed via jmp_buf (context).
*/
diff --git a/kernel/memprot.c b/kernel/memprot.c
new file mode 100644
index 00000000..cbbd51bb
--- /dev/null
+++ b/kernel/memprot.c
@@ -0,0 +1,99 @@
+/* Memory Protection Management
+ *
+ * Provides allocation and management functions for flexpages, which are
+ * software abstractions representing contiguous physical memory regions with
+ * hardware-enforced protection attributes.
+ */
+
+#include
+#include
+#include
+#include
+
+/* Creates and initializes a flexpage */
+fpage_t *mo_fpage_create(uint32_t base,
+ uint32_t size,
+ uint32_t rwx,
+ uint32_t priority)
+{
+ fpage_t *fpage = malloc(sizeof(fpage_t));
+ if (!fpage)
+ return NULL;
+
+ /* Initialize all fields */
+ fpage->as_next = NULL;
+ fpage->map_next = NULL;
+ fpage->pmp_next = NULL;
+ fpage->base = base;
+ fpage->size = size;
+ fpage->rwx = rwx;
+ fpage->pmp_id = PMP_INVALID_REGION; /* Not loaded into PMP initially */
+ fpage->flags = 0; /* No flags set initially */
+ fpage->priority = priority;
+ fpage->used = 0; /* Not in use initially */
+
+ return fpage;
+}
+
+/* Destroys a flexpage */
+void mo_fpage_destroy(fpage_t *fpage)
+{
+ if (!fpage)
+ return;
+
+ free(fpage);
+}
+
+/* Loads a flexpage into a hardware region */
+int32_t mo_load_fpage(fpage_t *fpage, uint8_t region_idx)
+{
+ return pmp_load_fpage(fpage, region_idx);
+}
+
+/* Evicts a flexpage from its hardware region */
+int32_t mo_evict_fpage(fpage_t *fpage)
+{
+ return pmp_evict_fpage(fpage);
+}
+
+/* Handles memory access faults */
+int32_t mo_handle_access_fault(uint32_t fault_addr, uint8_t is_write)
+{
+ return pmp_handle_access_fault(fault_addr, is_write);
+}
+
+/* Creates and initializes a memory space */
+memspace_t *mo_memspace_create(uint32_t as_id, uint32_t shared)
+{
+ memspace_t *mspace = malloc(sizeof(memspace_t));
+ if (!mspace)
+ return NULL;
+
+ mspace->as_id = as_id;
+ mspace->first = NULL;
+ mspace->pmp_first = NULL;
+ mspace->pmp_stack = NULL;
+ mspace->shared = shared;
+
+ return mspace;
+}
+
+/* Destroys a memory space and all its flexpages */
+void mo_memspace_destroy(memspace_t *mspace)
+{
+ if (!mspace)
+ return;
+
+ /* Evict and free all flexpages in the list */
+ fpage_t *fp = mspace->first;
+ while (fp) {
+ fpage_t *next = fp->as_next;
+ /* Evict from PMP hardware before freeing to prevent stale references */
+ if (fp->pmp_id != PMP_INVALID_REGION)
+ pmp_evict_fpage(fp);
+ mo_fpage_destroy(fp);
+ fp = next;
+ }
+
+ free(mspace);
+}
diff --git a/kernel/syscall.c b/kernel/syscall.c
index 7be66632..f7bfc864 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -384,16 +384,25 @@ int sys_uptime(void)
}
/* User mode safe output syscall.
- * Outputs a string from user mode by executing puts() in kernel context.
- * This avoids privilege violations from printf's logger mutex operations.
+ * Outputs a string from user mode directly via UART, bypassing the logger
+ * queue. Direct output ensures strict ordering for U-mode tasks and avoids race
+ * conditions with the async logger task.
*/
static int _tputs(const char *str)
{
if (unlikely(!str))
return -EINVAL;
- /* Use puts() which will handle logger enqueue or direct output */
- return puts(str);
+ /* Prevent task switching during output to avoid character interleaving.
+ * Ensures the entire string is output atomically with respect to other
+ * tasks.
+ */
+ NOSCHED_ENTER();
+ for (const char *p = str; *p; p++)
+ _putchar(*p);
+ NOSCHED_LEAVE();
+
+ return 0;
}
int sys_tputs(const char *str)
diff --git a/kernel/task.c b/kernel/task.c
index c9973e19..b363606a 100644
--- a/kernel/task.c
+++ b/kernel/task.c
@@ -8,6 +8,7 @@
#include
#include
#include
+#include
#include
#include "private/error.h"
@@ -44,6 +45,9 @@ static volatile uint32_t timer_work_generation = 0; /* counter for coalescing */
#define TIMER_WORK_DELAY_UPDATE (1U << 1) /* Task delay processing */
#define TIMER_WORK_CRITICAL (1U << 2) /* High-priority timer work */
+/* Kernel stack size for U-mode tasks */
+#define KERNEL_STACK_SIZE 512 /* 512 bytes per U-mode task */
+
#if CONFIG_STACK_PROTECTION
/* Stack canary checking frequency - check every N context switches */
#define STACK_CHECK_INTERVAL 32
@@ -347,6 +351,46 @@ void yield(void);
void _dispatch(void) __attribute__((weak, alias("dispatch")));
void _yield(void) __attribute__((weak, alias("yield")));
+/* Zombie Task Cleanup
+ *
+ * Scans the task list for terminated (zombie) tasks and frees their resources.
+ * Called from dispatcher to ensure cleanup happens in a safe context.
+ */
+static void task_cleanup_zombies(void)
+{
+ if (!kcb || !kcb->tasks)
+ return;
+
+ list_node_t *node = list_next(kcb->tasks->head);
+ while (node && node != kcb->tasks->tail) {
+ list_node_t *next = list_next(node);
+ tcb_t *tcb = node->data;
+
+ if (tcb && tcb->state == TASK_ZOMBIE) {
+ /* Remove from task list */
+ list_remove(kcb->tasks, node);
+ kcb->task_count--;
+
+ /* Clear from lookup cache */
+ for (int i = 0; i < TASK_CACHE_SIZE; i++) {
+ if (task_cache[i].task == tcb) {
+ task_cache[i].id = 0;
+ task_cache[i].task = NULL;
+ }
+ }
+
+ /* Free all resources */
+ if (tcb->mspace)
+ mo_memspace_destroy(tcb->mspace);
+ free(tcb->stack);
+ if (tcb->kernel_stack)
+ free(tcb->kernel_stack);
+ free(tcb);
+ }
+ node = next;
+ }
+}
+
/* Round-Robin Scheduler Implementation
*
* Implements an efficient round-robin scheduler tweaked for small systems.
@@ -527,6 +571,9 @@ void dispatch(void)
if (unlikely(!kcb || !kcb->task_current || !kcb->task_current->data))
panic(ERR_NO_TASKS);
+ /* Clean up any terminated (zombie) tasks */
+ task_cleanup_zombies();
+
/* Save current context - only needed for cooperative mode.
* In preemptive mode, ISR already saved context to stack,
* so we skip this step to avoid interference.
@@ -616,6 +663,9 @@ void dispatch(void)
next_task->state = TASK_RUNNING;
next_task->time_slice = get_priority_timeslice(next_task->prio_level);
+ /* Switch PMP configuration if tasks have different memory spaces */
+ pmp_switch_context(prev_task->mspace, next_task->mspace);
+
/* Perform context switch based on scheduling mode */
if (kcb->preemptive) {
/* Same task - no context switch needed */
@@ -628,6 +678,10 @@ void dispatch(void)
* When we return, ISR will restore from next_task's stack.
*/
hal_switch_stack(&prev_task->sp, next_task->sp);
+
+ /* Update kernel stack for next trap entry */
+ hal_set_kernel_stack(next_task->kernel_stack,
+ next_task->kernel_stack_size);
} else {
/* Cooperative mode: Always call hal_context_restore() because it uses
* setjmp/longjmp mechanism. Even if same task continues, we must
@@ -652,15 +706,16 @@ void yield(void)
* READY again.
*/
if (kcb->preemptive) {
- /* Trigger one dispatcher call - this will context switch to another
- * task. When we return here (after being rescheduled), our delay will
- * have expired.
+ /* Avoid triggering nested traps when already in trap context.
+ * The dispatcher can be invoked directly since the trap handler
+ * environment is already established.
*/
- __asm__ volatile("ecall");
+ if (trap_nesting_depth > 0) {
+ dispatcher(0);
+ } else {
+ __asm__ volatile("ecall");
+ }
- /* After ecall returns, we've been context-switched back, meaning we're
- * READY. No need to check state - if we're executing, we're ready.
- */
return;
}
@@ -675,7 +730,15 @@ void yield(void)
/* In cooperative mode, delays are only processed on an explicit yield. */
list_foreach(kcb->tasks, delay_update, NULL);
+ /* Save current task before scheduler modifies task_current */
+ tcb_t *prev_task = (tcb_t *) kcb->task_current->data;
+
sched_select_next_task(); /* Use O(1) priority scheduler */
+
+ /* Switch PMP configuration if tasks have different memory spaces */
+ tcb_t *next_task = (tcb_t *) kcb->task_current->data;
+ pmp_switch_context(prev_task->mspace, next_task->mspace);
+
hal_context_restore(((tcb_t *) kcb->task_current->data)->context, 1);
}
@@ -748,6 +811,36 @@ static int32_t task_spawn_impl(void *task_entry,
panic(ERR_STACK_ALLOC);
}
+ /* Create memory space for U-mode tasks only.
+ * M-mode tasks do not require PMP memory protection.
+ */
+ if (user_mode) {
+ tcb->mspace = mo_memspace_create(kcb->next_tid, 0);
+ if (!tcb->mspace) {
+ free(tcb->stack);
+ free(tcb);
+ panic(ERR_TCB_ALLOC);
+ }
+
+ /* Register stack as flexpage */
+ fpage_t *stack_fpage =
+ mo_fpage_create((uint32_t) tcb->stack, new_stack_size,
+ PMPCFG_R | PMPCFG_W, PMP_PRIORITY_STACK);
+ if (!stack_fpage) {
+ mo_memspace_destroy(tcb->mspace);
+ free(tcb->stack);
+ free(tcb);
+ panic(ERR_TCB_ALLOC);
+ }
+
+ /* Add stack to memory space */
+ stack_fpage->as_next = tcb->mspace->first;
+ tcb->mspace->first = stack_fpage;
+ tcb->mspace->pmp_stack = stack_fpage;
+ } else {
+ tcb->mspace = NULL;
+ }
+
/* Minimize critical section duration */
CRITICAL_ENTER();
@@ -778,15 +871,37 @@ static int32_t task_spawn_impl(void *task_entry,
CRITICAL_LEAVE();
+ /* Allocate per-task kernel stack for U-mode tasks */
+ if (user_mode) {
+ tcb->kernel_stack = malloc(KERNEL_STACK_SIZE);
+ if (!tcb->kernel_stack) {
+ CRITICAL_ENTER();
+ list_remove(kcb->tasks, node);
+ kcb->task_count--;
+ CRITICAL_LEAVE();
+ free(tcb->stack);
+ free(tcb);
+ panic(ERR_STACK_ALLOC);
+ }
+ tcb->kernel_stack_size = KERNEL_STACK_SIZE;
+ } else {
+ tcb->kernel_stack = NULL;
+ tcb->kernel_stack_size = 0;
+ }
+
/* Initialize execution context outside critical section. */
hal_context_init(&tcb->context, (size_t) tcb->stack, new_stack_size,
(size_t) task_entry, user_mode);
/* Initialize SP for preemptive mode.
* Build initial ISR frame on stack with mepc pointing to task entry.
+ * For U-mode tasks, frame is built on kernel stack; for M-mode on user
+ * stack.
*/
void *stack_top = (void *) ((uint8_t *) tcb->stack + new_stack_size);
- tcb->sp = hal_build_initial_frame(stack_top, task_entry, user_mode);
+ tcb->sp =
+ hal_build_initial_frame(stack_top, task_entry, user_mode,
+ tcb->kernel_stack, tcb->kernel_stack_size);
printf("task %u: entry=%p stack=%p size=%u prio_level=%u time_slice=%u\n",
tcb->id, task_entry, tcb->stack, (unsigned int) new_stack_size,
@@ -842,7 +957,11 @@ int32_t mo_task_cancel(uint16_t id)
CRITICAL_LEAVE();
/* Free memory outside critical section */
+ if (tcb->mspace)
+ mo_memspace_destroy(tcb->mspace);
free(tcb->stack);
+ if (tcb->kernel_stack)
+ free(tcb->kernel_stack);
free(tcb);
return ERR_OK;
}