1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
//! Arch-specific process switch functions //! //! This modules describe low-level functions and structures needed to perform a process switch use crate::process::ThreadStruct; use alloc::sync::Arc; use core::mem::size_of; use crate::i386::gdt::{GDT, MAIN_TASK}; use crate::i386::gdt::GdtIndex; /// The hardware context of a paused thread. It contains just enough registers to get the thread /// running again. /// /// All other registers are to be saved on the thread's kernel stack before scheduling, /// and restored right after re-schedule. /// /// Stored in the ThreadStruct of every thread. #[derive(Debug)] pub struct ThreadHardwareContext { /// The top of the stack, where all other registers are saved. esp: usize, } impl Default for ThreadHardwareContext { /// Creates an empty ThreadHardwareContext. fn default() -> Self { // the saved esp will be overwritten on schedule-out anyway Self { esp: 0x55555555 } } } /// Performs the process switch, switching from currently running process A, to process B. /// /// The process switch is composed of two parts : /// /// * The "schedule out" part, where A takes care of saving its registers, prepares itself to be left, /// and performs the switch by loading B's registers. /// A is now stopped and waiting to be scheduled in again. /// * The "schedule in" part, where B which was previously scheduled out by another process switch, /// now restores the registers it had saved on the stack, finalises the switch, /// and resumes its previous activity. /// /// ### Schedule out: /// /// The schedule-out code performs the following steps: /// /// 1. change A's state from Running to Scheduled /// 2. change B's state from Scheduled to Running /// 3. switch to using B's memory space. KernelLand of A is copied to B at this point. /// 4. save registers of A on its stack /// 5. save special "hardware_context" registers of A in its ProcessStruct. /// This is only the register containing the pointer to the top of the stack /// where all other registers are saved. /// 6. load B's special hardware_contexts registers. /// This is where the process switch actually happens. Now we are running on B's stack, /// and Program Counter was moved to B's schedule-in routine /// /// ### Schedule in: /// /// 1. restore the registers that it had saved on the stack /// 2. return to what it was doing before /// /// ### Switching to a fresh process: /// /// In the special case where B is a newly born process, and it's its first time being scheduled (Owww, so cute), /// it hasn't been scheduled out before, and doesn't have anything on the stack yet. /// We choose to use the same schedule-in method for both cases, that means the schedule-in will /// expect the new process to have a bunch of values on the stack that will be pop'ed into registers, /// and finally ret' to a saved program counter on the stack. /// This program counter can be used to control where the process will end-up on it's first schedule, /// likely just a function that will jump straight to userspace. /// /// The stack can be prepared for schedule-in by the function prepare_for_first_schedule(). /// /// # Return /// /// Returns an Arc to the current ProcessSwitch after the switch, which was passed on during the switch. /// /// # Panics /// /// Panics if the locks protecting the ProcessStruct of current or B process cannot be obtained. /// Panics if the locks protecting the MAIN_TASK TSS or DOUBLE_FAULT_TSS cannot be obtained. /// /// # Safety /// /// Interrupts definitely must be masked when calling this function #[inline(never)] // we need that sweet saved ebp + eip on the stack pub unsafe extern "C" fn process_switch(thread_b: Arc<ThreadStruct>, thread_current: Arc<ThreadStruct>) -> Arc<ThreadStruct> { let esp_to_load = { // todo do not try to change cr3 if thread_b belongs to the same process. //let mut thread_current_lock_pmemory = thread_current.pmemory.try_lock() // .expect("process_switch cannot get current thread' lock for writing"); let mut thread_b_lock_pmemory = thread_b.process.pmemory.try_lock() .expect("process_switch cannot get destination thread' lock for writing"); let mut thread_current_lock_phwcontext = thread_current.hwcontext.try_lock() .expect("process_switch cannot get current thread' lock for writing"); let thread_b_lock_phwcontext = thread_b.hwcontext.try_lock() .expect("process_switch cannot get destination thread' lock for writing"); // Switch the memory pages thread_b_lock_pmemory.switch_to(); // Update the TLS segments. They are not loaded yet. let mut gdt = GDT .r#try().expect("GDT not initialized") .try_lock().expect("Could not lock GDT"); gdt.table[GdtIndex::UTlsRegion as usize].set_base(thread_b.tls_region.addr() as u32); gdt.table[GdtIndex::UTlsElf as usize].set_base(thread_b.tls_elf.lock().addr() as u32); gdt.commit(None, None, None, None, None, None); let current_esp: usize; llvm_asm!("mov $0, esp" : "=r"(current_esp) : : : "intel", "volatile"); // on restoring, esp will point to the top of the saved registers let esp_to_save = current_esp - (8 + 1 + 1) * size_of::<usize>(); thread_current_lock_phwcontext.esp = esp_to_save; let esp_to_load = thread_b_lock_phwcontext.esp; // unlock the threads, they become available to be taken between now and when B will take // them again on schedule in, but since there is no SMP and interrupts are off, // this should be ok ... drop(thread_b_lock_pmemory); //drop(thread_current_lock_pmemory); drop(thread_b_lock_phwcontext); drop(thread_current_lock_phwcontext); esp_to_load }; // Set IOPB back to "nothing allowed" state // todo do not change iopb if thread_b belongs to the same process. // MAIN_TSS should otherwise only be locked during DOUBLE_FAULTING, // in which case we really shouldn't be context-switching. let mut main_tss = MAIN_TASK.try_lock() .expect("Cannot lock main tss"); for ioport in &thread_current.process.capabilities.ioports { let ioport = *ioport as usize; main_tss.iopb[ioport / 8] = 0xFF; } drop(main_tss); // current is still stored in scheduler's global CURRENT_PROCESS, so it's not dropped yet. drop(thread_current); // we pass a pointer to its ThreadStruct to the thread we're about to switch to. // Arc::into_raw does not decrement the reference count, so it's temporarily leaked. // This also prevents thread B to be dropped when we're about to switch to it. let thread_b_whoami = Arc::into_raw(thread_b); let whoami: *const ThreadStruct; llvm_asm!(" // Push all registers on the stack, swap to B's stack, and jump to B's schedule-in schedule_out: lea eax, resume // we push a callback function, called at the end of schedule-in push eax pushad // pushes eax, ecx, edx, ebx, ebp, original esp, ebp, esi, edi pushfd // pushes eflags // load B's stack, and jump to its schedule-in mov esp, $1 // thread B resumes here schedule_in: // Ok ! Welcome again to B ! // restore the saved registers popfd // pop eflags mov [esp], edi // edi contains our precious ThreadStruct ptr, we do not want to lose it. popad // pop edi (overwritten), esi, ebp, ebx, edx, ecx, eax. Pushed esp is ignored ret // ret to the callback pushed on the stack // If this was not the first time the thread was scheduled-in, // it ends up here resume: // return to rust code as if nothing happened " : "={edi}"(whoami) // at re-schedule, $edi contains a pointer to our ThreadStruct : "r"(esp_to_load), "{edi}"(thread_b_whoami) : "eax" : "volatile", "intel"); // ends up here if it was not our first schedule-in // recreate the Arc to our ThreadStruct from the pointer that was passed to us let me = unsafe { Arc::from_raw(whoami) }; // MAIN_TSS should have been unlocked during schedule-out. Re-take it. let mut main_tss = MAIN_TASK.try_lock() .expect("Cannot lock main tss"); // Set the ESP0 main_tss.tss.esp0 = me.kstack.get_stack_start() as u32; // Set IOPB for ioport in &me.process.capabilities.ioports { let ioport = *ioport as usize; main_tss.iopb[ioport / 8] &= !(1 << (ioport % 8)); } me } /// Prepares the thread for its first schedule by writing default values at the start of the /// stack that will be loaded in the registers in schedule-in. /// See process_switch() documentation for more details. /// /// # Safety /// /// This function will definitely fuck up your stack, so make sure you're calling it on a /// never-scheduled thread's empty-stack. #[allow(clippy::fn_to_numeric_cast)] pub unsafe fn prepare_for_first_schedule(t: &ThreadStruct, entrypoint: usize, userspace_args: (usize, usize), userspace_stack: usize) { #[repr(packed)] #[allow(clippy::missing_docs_in_private_items)] struct RegistersOnStack { eflags: u32, edi: u32, esi: u32, ebp: u32, esp: u32, ebx: u32, edx: u32, ecx: u32, eax: u32, callback_eip: u32 // -------------- // poison ebp // poison eip }; let stack_start = t.kstack.get_stack_start() as u32; // * $esp * eflags // ... // * puhad's ebp * 0xaaaaaaaa -+ // ... | // * callback eip * ... | // -------------------------- | // * poison ebp * 0x00000000 <---+ < "get_stack_start()" // * poison eip * 0x00000000 let initial_registers = RegistersOnStack { // Please keep the order of those arguments - they are currently ordered // the same way `pushad; pushfd;` does. eflags: 0x00000000, // no flag set, seems ok edi: 0, // Overwritten by process_switch esi: 0, ebp: stack_start, // -+ esp: 0, // ignored by the popad anyway // | ebx: userspace_stack as u32, // | edx: userspace_args.1 as u32, // | ecx: userspace_args.0 as u32, // | eax: entrypoint as u32, // | callback_eip: first_schedule as u32 // | // -------------- | // poison ebp <------------------------+ * 'stack_start' * // poison eip }; let initial_registers_stack_top = (t.kstack.get_stack_start() - ::core::mem::size_of::<RegistersOnStack>()) as *mut RegistersOnStack; ::core::ptr::write(initial_registers_stack_top, initial_registers); // put the pointer to the top of the structure as the $esp to be loaded on schedule-in t.hwcontext.lock().esp = initial_registers_stack_top as usize; } /// The function ret'd on, on a thread's first schedule - as setup by the prepare_for_first_schedule. /// /// At this point, interrupts are still off. This function should ensure the thread is properly /// switched (set up ESP0, IOPB and whatnot) and call [`scheduler_first_schedule`]. /// /// # Safety: /// /// * Interrupts must be disabled. /// * Arguments must respect the [`prepare_for_first_schedule`] ABI, and be popped into registers. /// /// [`scheduler_first_schedule`]: crate::scheduler::scheduler_first_schedule. #[naked] unsafe fn first_schedule() { // just get the ProcessStruct pointer in $edi, the entrypoint in $eax, and call a rust function unsafe { llvm_asm!(" push ebx push edx push ecx push eax push edi call ${0:P} " : : "s"(first_schedule_inner as *const u8) : : "volatile", "intel"); } /// Stack is set-up, now we can run rust code. extern "C" fn first_schedule_inner(whoami: *const ThreadStruct, entrypoint: usize, arg1: usize, arg2: usize, userspace_stack: usize) -> ! { // reconstruct an Arc to our ProcessStruct from the leaked pointer let current = unsafe { Arc::from_raw(whoami) }; // MAIN_TSS must have been unlocked by now. let mut main_tss = MAIN_TASK.try_lock() .expect("Cannot lock main tss"); // Set the ESP0 main_tss.tss.esp0 = current.kstack.get_stack_start() as u32; // todo do not touch iopb if we come from a thread of the same process. // Set IOPB for ioport in ¤t.process.capabilities.ioports { let ioport = *ioport as usize; main_tss.iopb[ioport / 8] &= !(1 << (ioport % 8)); } drop(main_tss); // unlock it // call the scheduler to finish the high-level process switch mechanics unsafe { // safe: interrupts are off crate::scheduler::scheduler_first_schedule(current, || jump_to_entrypoint(entrypoint, userspace_stack, arg1, arg2)); } unreachable!() } } /// Jumps to Userspace, and run a userspace program. /// /// This function is called on the first schedule of a process or thread, /// after all the process_switch mechanics is over, and the thread is good to go. /// /// It jumps to ring 3 by pushing the given `ep` and `userspace_stack_ptr` on the KernelStack, /// and executing an `iret`. /// /// Just before doing the `iret`, it clears all general-purpose registers. /// /// This way, just after the `iret`, cpu will be in ring 3, witl all of its registers cleared, /// `$eip` pointing to `ep`, and `$esp` pointing to `userspace_stack_ptr`. fn jump_to_entrypoint(ep: usize, userspace_stack_ptr: usize, arg1: usize, arg2: usize) -> ! { // gonna write constants in the code, cause not enough registers. // just check we aren't hard-coding the wrong values. const_assert_eq!((GdtIndex::UCode as u16) << 3 | 0b11, 0x2B); const_assert_eq!((GdtIndex::UData as u16) << 3 | 0b11, 0x33); const_assert_eq!((GdtIndex::UTlsRegion as u16) << 3 | 0b11, 0x3B); const_assert_eq!((GdtIndex::UTlsElf as u16) << 3 | 0b11, 0x43); const_assert_eq!((GdtIndex::UStack as u16) << 3 | 0b11, 0x4B); unsafe { llvm_asm!(" mov ax,0x33 // ds, es <- UData, Ring 3 mov ds,ax mov es,ax mov ax,0x3B // fs <- UTlsRegion, Ring 3 mov fs,ax mov ax, 0x43 // gs <- UTlsElf, Ring 3 mov gs,ax // Build the fake stack for IRET push 0x4B // Userland Stack, Ring 3 push $1 // Userspace ESP pushfd push 0x2B // Userland Code, Ring 3 push $0 // Entrypoint // Clean up all registers. Also setup arguments. // mov ecx, arg1 // mov edx, arg2 mov eax, 0 mov ebx, 0 mov ebp, 0 mov edi, 0 mov esi, 0 iretd " :: "r"(ep), "r"(userspace_stack_ptr), "{ecx}"(arg1), "{edx}"(arg2) : /* Prevent using eax as input, it's used early. */ "eax" : "intel", "volatile"); } unreachable!() }