Browse Source

Investigation on virtual memory continues

master
Macoy Madson 3 months ago
parent
commit
1cb2582c34
  1. 6
      ReadMe.org
  2. 21
      src/OS.cake
  3. 138
      src/boot.S

6
ReadMe.org

@ -22,10 +22,14 @@ Put the following for ~config.txt~:
- Copy ~kernel8.img~ to the SD card
* References
Raspberry Pi:
- [[https://www.rpi4os.com/][Rpi 4 OS tutorial]]
- [[https://developer.arm.com/documentation/den0024/a/][Arm Programmer's guide]]
- [[https://github.com/raspberrypi/firmware/wiki/Mailbox-property-interface][Mailbox property interface]]
Arm:
- [[https://www.arm.com/architecture/learn-the-architecture/a-profile][Learn the architecture]]
- [[https://developer.arm.com/documentation/den0024/a/][Arm Programmer's guide]]
* My stumbling blocks
- Not having *all* the firmware on the SD card
- Not writing the LED blink code correctly, i.e. I wasn't waiting while the LED was off, so it was just solid on. Viewing the disassembly helped clue me in to it.

21
src/OS.cake

@ -218,6 +218,27 @@
(blit-string (addr s-font-heading) 500 800 "Exception level 2" log-in-color-rgb))
((= exception-level 3)
(blit-string (addr s-font-heading) 500 800 "Exception level 3" log-in-color-rgb)))
(declare-extern-function get_implemented_physical_address_size (&return int))
(var implemented-physical-address-size int (get_implemented_physical_address_size))
(cond
((= implemented-physical-address-size 0)
(blit-string (addr s-font-heading) 500 900 "Implemented physical address size: 4 GiB" log-in-color-rgb))
((= implemented-physical-address-size 1)
(blit-string (addr s-font-heading) 500 900 "Implemented physical address size: 64 GiB" log-in-color-rgb))
((= implemented-physical-address-size 2)
(blit-string (addr s-font-heading) 500 900 "Implemented physical address size: 1 TiB" log-in-color-rgb))
((= implemented-physical-address-size 3)
(blit-string (addr s-font-heading) 500 900 "Implemented physical address size: 4 TiB" log-in-color-rgb))
((= implemented-physical-address-size 4)
(blit-string (addr s-font-heading) 500 900 "Implemented physical address size: 16 TiB" log-in-color-rgb))
((= implemented-physical-address-size 5)
(blit-string (addr s-font-heading) 500 900 "Implemented physical address size: 256 TiB" log-in-color-rgb))
((= implemented-physical-address-size 6)
(blit-string (addr s-font-heading) 500 900 "Implemented physical address size: 4 PiB" log-in-color-rgb))
(true
(blit-string (addr s-font-heading) 500 900 "Implemented physical address size: Unknown" log-in-color-rgb)))
;; (blit-string (addr s-font-body) 500 770 "(This doesn't actually work yet)" log-in-color-rgb)
)

138
src/boot.S

@ -9,6 +9,16 @@
#define SPSR_EL2_TO_EL1_VAL \
(SPSR_EL2_D | SPSR_EL2_A | SPSR_EL2_I | SPSR_EL2_F | SPSR_EL2_MODE_EL1H)
#define SCTLR_EL1_RESERVED (3 << 28) | (3 << 22) | (1 << 20) | (1 << 11)
#define SCTLR_EL1_EE_LITTLE_ENDIAN (0 << 25)
#define SCTLR_EL1_EOE_LITTLE_ENDIAN (0 << 24)
#define SCTLR_EL1_I_CACHE_DISABLED (0 << 12)
#define SCTLR_EL1_D_CACHE_DISABLED (0 << 2)
#define SCTLR_EL1_MMU_DISABLED (0 << 0)
#define SCTLR_EL1_MMU_ENABLED (1 << 0)
#define SCTLR_EL1_VAL_MMU_DISABLED (SCTLR_EL1_RESERVED | SCTLR_EL1_EE_LITTLE_ENDIAN | SCTLR_EL1_I_CACHE_DISABLED | SCTLR_EL1_D_CACHE_DISABLED | SCTLR_EL1_MMU_DISABLED)
.section ".text.boot" // Put this at start of kernel
.global _start
@ -87,6 +97,124 @@ _start:
//cmp x1, #2
//b.ne 1b
//
// Set up virtual memory
//
// Set cache granularity
// (todo performance) Set Translation table shareability/cacheability as per ARM DEN0024A 12-18 section 12.5
ldr x1, =(3 << 30) // TG1 Translation granule for EL1 set to 0b11=64KiB (2^16 bytes)
ldr x2, =(3 << 14) // TG0 Translation granule for EL0 set to 0b11=64KiB
orr x1, x1, x2
// Set number of significant bits. We want to eliminate table level 0 and 1 since they are
// unnecessarily large for 64KiB blocks, so we set the TnSZ to skip it.
// See R FMBKV in ARM DDI 0487J.a
// "The size offset of the memory region addressed by TTBR1_EL1. The region size is 2^(64-T1SZ) bytes."
// "Next, imagine you set T0SZ to 34:
// 64 - T0SZ = 30-bit address space (address bits 29:0)"
// Granule size is 64KiB, which according to ARM DEN0024A 12-16:
// "Bits 47:42 of the Virtual Address select a descriptor from the 64 entry L1 table.
// Each of these table entries spans a 4TB range and points to an L2 table. Within
// that 8192 entry L2 table, bits 41:29 are used as index to select an entry and each
// entry points to either a 512 MB block or an L2 table. At the final translation stage,
// bits 28:16 index into an 8192 entry L3 table and each entry points to a 64kB
// block."
// The ARM document 101811_0103_01_en ("Learn the architecture - AArch64 memory management")
// helps clear this up for me.
//
// We ignore [47:42] (stage 1 lookup) by reducing our address space with TnSZ.
// Only use [32:29] (table level 2), 4 bits to index 16 blocks of 512 MiB each covering 8 GiB
// and [28:16] (table level 3) to index 8192 pages of 64 KiB each.
// which allows us to either have 512 MiB blocks resolved straight from stage 2 or 64 KiB
// blocks from stage 3.
// These numbers were picked based on the following:
// - Arm recommends the largest pages possible in order to reduce TLB cache misses and table
// sizes. This is intuitive. The drawback is lower granularity, but the type of processes I'm
// imagining (few, large processes, possibly with many "threads" which share memory) mean the
// expense of the smallest page being 64 KiB is not a concern. (If you had a million tiny
// processes, it might make sense going with more granular processes.)
// - The Raspberry Pi 4 B has a maximum of 8 GiB physical RAM. I don't think it's a good idea to
// abstract away memory such that you don't know exactly when your pages are in RAM or on
// disk. If you suddenly have to load from disk, it could cause frame stutters etc. With this
// in mind, it does not make sense to have a larger address space than is possible in RAM.
// The argument could be made to have 16 GiB space to leave room for separate device memory,
// which I may find necessary. At this level it won't be hard to increase the space slightly.
// "TCR_EL1 has two separate elds that control the granule size for the kernel space
// and the user space virtual address ranges. These elds are called TG1 for kernel
// space and TG0 for user space. A potential problem for programmers is that these
// two elds have different encodings." 101811_0103_01_en
// Number of bits of address = 64 - TnSZ; we want a 33 bit address space (8 GiB), so TnSZ = 31
ldr x2, =31
// LEFT OFF: Figure out how to encode the bits
//lsr x2, x2, // to t1sz and t0sz
// Translation control register
msr tcr_el1, x2
isb // Force changes to registers to be seen before we enable the MMU
// Set up Memory Attribute Indirection Register MAIR, which specifies the available types of
// memory behaviors
// Device memory types:
// (G)athering: allow multiple accesse to be merged into a single transaction
// (R)e-ordering: allow accesses to the same device to be re-ordered with respect to each otehr
// (E)arly write acknowledgement: Whether intermediate write buffer between processor and slave
// can send an acknowledgement of write completion rather than wait for the device to confirm
// the write.
// We have 8 possible types available. See ARM DDI 0487J.a D19-6894
// [0] Device memory, non-gather, no re-order, no early right (a.k.a. strict order, slow!)
ldr x1, =(0x00 << (8 * 0))
// [1] Normal memory, for now (todo performance) non-cacheable (both inner and outer) = 0b01000100
ldr x2, =(0x44 << (8 * 1))
orr x1, x1, x2
msr mair_el1, x1
// Set up page tables
// Table descriptor for 64KiB, 48-bit address (since TCR_EL1.DS is 0, otherwise it'd be 52 bits)
// [63:59] attributes, [58:51] ignored, [50:48] reserved 0, [47:16] next level table address,
// [15:12] reserved 0, [11:2] ignored, [1] Is block entry (0) or table entry (1), [0] is valid
// The [63:59] attributes seem to be dealing with execute never and virtualization stuff.
// Block descriptor for 64KiB 48-bit address
// [63:50] attributes, [49:48] reserved 0, [47:29] block address,
// [28:17] reserved 0, [16] nT, [15:12] reserved 0, [11:2] lower attributes,
// [1] Is block entry (0) or table entry (1), [0] is valid
// Page descriptor for 64KiB 48-bit address
// [63:50] attributes, [49:48] reserved 0, [47:16] output address,
// [15:12] reserved 0, [11:2] lower attributes,
// [1] Is block entry (0) or table entry (1), [0] is valid
// In block and page descriptors
// See ARM DAI 0527A Setting up the MMU
// adr x0, ttb0_base // must be 64KiB aligned
// // Set the translation table for addresses with upper bits 0 to our level 1 table stored at ttb0_base
// TODO: I think ttbr1_el1 needs to be set as well!
// msr ttbr0_el1, x0
// // Level 1 translation table
// ldr x1, =level2_pagetable // must be 64KiB aligned
// ldr x2, =0xffff0000 // Mask to 64KiB just in case the alignment is off (not sure if this is necessary)
// and x2, x1, x2
// orr x2, x2, 0x3 // Set [1] table entry [0] valid
// // All upper attributes are 0
// str x2, [x0], #8 // Set the first entry in our table
// All other entries are invalid (our first entry represents 64 GiB of space over the 4 TiB total range)
// As per I FTBXR in ARM DDI 0487J.a, I should be able to reduce this by setting TCR_ELx.TnSZ
// Minimum: 16 Maximum: 39
// At stage 2, the effective minimum is 32 for 32 bit PA
// Level 2 translation table
//
// Final setup before EL2->EL1
//
/* setup SCTLR_EL1 */
ldr x0, =SCTLR_EL1_VAL_MMU_DISABLED
msr SCTLR_EL1, x0
/* Enable MMU virtual memory */
// mrs x0, sctlr_el1
// orr x0, x0, #1 // Set [M] bit to enable MMU
// msr sctlr_el1, x0
// isb // Force changes for next instruction
// Tell the hypervisor we want AArch64 in all lower ELs
mrs x0, hcr_el2
orr x0, x0, #(1<<31)
@ -104,7 +232,8 @@ _start:
el1_entry:
// We MUST initialize the stack AFTER the EL has changed, because we're using a new stack
// pointer at this EL. Took me way to long to figure this out.
// TODO What should this value actually be?
// TODO What should this value actually be? The stack seems to grow down, so this would start
// at 0x80000 and work towards 0x0.
ldr x1, =_start
mov sp, x1
// Jump to main (note that we never expect this to return)
@ -117,3 +246,10 @@ get_el:
mrs x0, CurrentEL
lsr x0, x0, #2
ret
// On Raspberry Pi 4B, it reported 16 TiB
.globl get_implemented_physical_address_size
get_implemented_physical_address_size:
mrs x0, ID_AA64MMFR0_EL1
and x0, x0, 0xf
ret

Loading…
Cancel
Save