|
|
@ -20,8 +20,10 @@ |
|
|
|
#define SCTLR_EL1_VAL_MMU_DISABLED (SCTLR_EL1_RESERVED | SCTLR_EL1_EE_LITTLE_ENDIAN | SCTLR_EL1_I_CACHE_DISABLED | SCTLR_EL1_D_CACHE_DISABLED | SCTLR_EL1_MMU_DISABLED) |
|
|
|
|
|
|
|
// TODO define these better |
|
|
|
#define MEMORY_LEVEL2_TABLE_EL1 translation_table_level2_el1 |
|
|
|
#define MEMORY_LEVEL2_TABLE_EL0 translation_table_level2_el0 |
|
|
|
// #define MEMORY_LEVEL2_TABLE_EL1 translation_table_level2_el1 |
|
|
|
// #define MEMORY_LEVEL2_TABLE_EL0 translation_table_level2_el0 |
|
|
|
#define MEMORY_LEVEL2_TABLE_EL1 =0xf00000 |
|
|
|
#define MEMORY_LEVEL2_TABLE_EL0 =0xe00000 |
|
|
|
|
|
|
|
.section ".text.boot" // Put this at start of kernel |
|
|
|
|
|
|
@ -152,7 +154,7 @@ _start: |
|
|
|
// (todo hardware support) The arm_high_peri mode would need to be enabled it seems to allow |
|
|
|
// 8 GiB RAM. I wonder if that's an actual requirement. |
|
|
|
ldr x2, =31 |
|
|
|
lsr x2, x2, #16 // to t1sz |
|
|
|
lsl x2, x2, #16 // to t1sz |
|
|
|
ldr x3, =31 // t0sz |
|
|
|
orr x1, x1, x2 // t1sz |
|
|
|
orr x1, x1, x3 // t0sz |
|
|
@ -164,7 +166,7 @@ _start: |
|
|
|
// memory behaviors |
|
|
|
// Device memory types: |
|
|
|
// (G)athering: allow multiple accesse to be merged into a single transaction |
|
|
|
// (R)e-ordering: allow accesses to the same device to be re-ordered with respect to each otehr |
|
|
|
// (R)e-ordering: allow accesses to the same device to be re-ordered with respect to each other |
|
|
|
// (E)arly write acknowledgement: Whether intermediate write buffer between processor and slave |
|
|
|
// can send an acknowledgement of write completion rather than wait for the device to confirm |
|
|
|
// the write. |
|
|
@ -193,11 +195,11 @@ _start: |
|
|
|
|
|
|
|
// See ARM DAI 0527A Setting up the MMU |
|
|
|
// We start at level 2 thanks to our lower TCR_EL1.TnSZ |
|
|
|
adrp x0, MEMORY_LEVEL2_TABLE_EL1 // must be 64KiB aligned |
|
|
|
ldr x0, MEMORY_LEVEL2_TABLE_EL1 // must be 64KiB aligned |
|
|
|
// Set the translation table for addresses with upper bits 0 to our level 2 table stored at ttb2_base |
|
|
|
msr ttbr1_el1, x0 |
|
|
|
// Zero out our level 2 table |
|
|
|
adrp x1, MEMORY_LEVEL2_TABLE_EL1 |
|
|
|
ldr x1, MEMORY_LEVEL2_TABLE_EL1 |
|
|
|
ldr w2, =16 |
|
|
|
5: |
|
|
|
cbz w2, 6f |
|
|
@ -207,7 +209,7 @@ _start: |
|
|
|
cbnz w2, 5b // Continue setting if non-zero |
|
|
|
6: |
|
|
|
// Zero out our level 2 table EL0 |
|
|
|
adrp x1, MEMORY_LEVEL2_TABLE_EL0 |
|
|
|
ldr x1, MEMORY_LEVEL2_TABLE_EL0 |
|
|
|
ldr w2, =16 |
|
|
|
7: |
|
|
|
cbz w2, 8f |
|
|
@ -216,23 +218,53 @@ _start: |
|
|
|
sub w2, w2, #1 |
|
|
|
cbnz w2, 7b // Continue setting if non-zero |
|
|
|
8: |
|
|
|
ldr x0, MEMORY_LEVEL2_TABLE_EL1 // must be 64KiB aligned |
|
|
|
// Level 2 translation table uses bits [32:29] to index 16 blocks of 512 MiB each, either block or table descriptors |
|
|
|
// For EL1, we are going to flat map the first 512 MiB as a block of normal memory |
|
|
|
ldr x1, =0x0 // must be 64KiB aligned |
|
|
|
ldr x2, =0xffff0000 // Mask to 64KiB just in case the alignment is off (not sure if this is necessary) |
|
|
|
and x2, x1, x2 |
|
|
|
ldr x3, =1 // Desired memory attribute index (index in MAIR), we want Normal memory |
|
|
|
lsr x3, x3, #2 // AttrIndex is bits [4:2] in lower attributes |
|
|
|
lsl x3, x3, #2 // AttrIndex is bits [4:2] in lower attributes |
|
|
|
orr x2, x2, x3 // Add attribute (Normal memory) |
|
|
|
ldr x3, =1 // Desired access permissions (allow read write from EL0) |
|
|
|
lsl x3, x3, #6 |
|
|
|
orr x2, x2, x3 // Set access permissions |
|
|
|
ldr x3, =3 // Desired shareability (inner sharable) |
|
|
|
lsl x3, x3, #8 |
|
|
|
orr x2, x2, x3 // Set shareability |
|
|
|
ldr x3, =1 // Desired access flag (accessed) See R XFXTY and below in ARM DDI 0487J.a! |
|
|
|
lsl x3, x3, #10 |
|
|
|
orr x2, x2, x3 // Set access flag |
|
|
|
orr x2, x2, #1 // Set [1] block entry (=0) and [0] valid (=1) |
|
|
|
// All upper attributes are 0 |
|
|
|
str x2, [x0], #56 // Set the first entry in our table, then increment to entry[7] for peripherals (8 * 7 bytes) |
|
|
|
// Set block 7 to be device nGnRnE memory for peripherals |
|
|
|
str x2, [x0], #8 // Set the first entry in our table, incrementing for the next entry |
|
|
|
// Set the remaining entries (note we only do 8 entries total for 4 GiB Rpi, hence the 7) |
|
|
|
ldr x5, =7 // number of entries remaining to set |
|
|
|
ldr x6, =0x20000000 // start address |
|
|
|
ldr x7, =0x20000000 // increment |
|
|
|
9: |
|
|
|
// Set all other blocks to be device nGnRnE memory for peripherals |
|
|
|
// (todo performance) Note that we only need to map 64 MiB, but we'll do the whole block for |
|
|
|
// simplicity's sake. |
|
|
|
ldr x2, =0xe0000000 // Start of block 7 |
|
|
|
//ldr x2, =0xe0000000 // Start of block 7 |
|
|
|
mov x2, x6 |
|
|
|
// Device nGnRnE = 00 AttrIndex, so nothing to set for that |
|
|
|
ldr x3, =1 // Desired access permissions (allow read write from EL0) |
|
|
|
lsl x3, x3, #6 |
|
|
|
orr x2, x2, x3 // Set access permissions |
|
|
|
ldr x3, =3 // Desired shareability (inner sharable) |
|
|
|
lsl x3, x3, #8 |
|
|
|
orr x2, x2, x3 // Set shareability |
|
|
|
ldr x3, =1 // Desired access flag (accessed) See R XFXTY and below in ARM DDI 0487J.a! |
|
|
|
lsl x3, x3, #10 |
|
|
|
orr x2, x2, x3 // Set access flag |
|
|
|
orr x2, x2, #1 // Set [1] block entry (=0) and [0] valid (=1) |
|
|
|
str x2, [x0] |
|
|
|
str x2, [x0], #8 |
|
|
|
add x6, x6, x7 // move to next physical address |
|
|
|
sub x5, x5, #1 |
|
|
|
cbnz x5, 9b |
|
|
|
10: |
|
|
|
// For EL1/OS memory, we will leave the rest of the entries as invalid except entry[7], which |
|
|
|
// is where our memory mapped peripherals exist starting at 0xfe00_0000. |
|
|
|
// So, with the low mode peripheral base at 0xfe00_0000, we use at least the top 32 MiB of block [7]. |
|
|
@ -267,7 +299,8 @@ _start: |
|
|
|
// peripherals. The peripherals document confusingly puts 0x0_fe00_0000 as the base due to |
|
|
|
// "legacy master addresses" which I don't really understand. |
|
|
|
// For EL0, we are going to leave it as invalid since I'm not doing user space yet |
|
|
|
adrp x0, MEMORY_LEVEL2_TABLE_EL0 // must be 64KiB aligned |
|
|
|
// TODO TODO Set up EL0 table |
|
|
|
ldr x0, MEMORY_LEVEL2_TABLE_EL1 // must be 64KiB aligned |
|
|
|
// Set the translation table for addresses with upper bits 0 to our level 2 table |
|
|
|
msr ttbr0_el1, x0 |
|
|
|
|
|
|
@ -294,29 +327,35 @@ _start: |
|
|
|
// Trigger exception to take us to EL1 |
|
|
|
eret |
|
|
|
el1_entry: |
|
|
|
// Before enabling virtual memory, we need to get the physical address of our main function |
|
|
|
ldr x9, =main |
|
|
|
ldr x1, =_start |
|
|
|
// We MUST initialize the stack AFTER the EL has changed, because we're using a new stack |
|
|
|
// pointer at this EL. Took me way to long to figure this out. |
|
|
|
// TODO What should this value actually be? The stack seems to grow down, so this would start |
|
|
|
// at 0x80000 and work towards 0x0. |
|
|
|
mov sp, 0x80000 |
|
|
|
|
|
|
|
/* Enable MMU virtual memory */ |
|
|
|
mrs x0, sctlr_el1 |
|
|
|
// orr x0, x0, #1 // Set [M] bit to enable MMU |
|
|
|
orr x0, x0, #1 // Set [M] bit to enable MMU |
|
|
|
msr sctlr_el1, x0 |
|
|
|
isb // Force changes for next instruction |
|
|
|
|
|
|
|
// We MUST initialize the stack AFTER the EL has changed, because we're using a new stack |
|
|
|
// pointer at this EL. Took me way to long to figure this out. |
|
|
|
// TODO What should this value actually be? The stack seems to grow down, so this would start |
|
|
|
// at 0x80000 and work towards 0x0. |
|
|
|
mov sp, x1 |
|
|
|
// Stack pointer is now virtual |
|
|
|
mov x0, 0xffff000000000000 |
|
|
|
add sp, sp, x0 |
|
|
|
|
|
|
|
ldr x0, =main |
|
|
|
blr x0 |
|
|
|
|
|
|
|
// LEFT OFF: First print contents of our translation tables, just to make sure my asm is okay |
|
|
|
// Next, double check my setting up of ttbrN and TCR_EL1 |
|
|
|
// Read again how the addresses need to change. Do my tables need to have their virtual addresses? |
|
|
|
// I found some messed up lsr -> lsl, and added more lower attributes as per the boot code manual |
|
|
|
// I need to really understand how the addresses change after shared is enabled, because I'm |
|
|
|
// just fucking around right now without understanding. |
|
|
|
// Jump to main (note that we never expect this to return) |
|
|
|
//bl main |
|
|
|
// orr x9, x9, #0xffff000000000000 |
|
|
|
br x9 // virtual memory now enabled; this is main |
|
|
|
//br x9 // virtual memory now enabled; this is main |
|
|
|
// If main returns, halt |
|
|
|
b 1b |
|
|
|
|
|
|
|