ARM Integrator-CP ITPTMME Phase2
This page or section refers to its readers or editors using I, my, we or us. It should be edited to be in an encyclopedic tone. |
Processes & Threads, Exception Handling, And System Calls
In this phase we are going to improve our exception handling, rework out tasking system to support processes and threads (instead of tasks), and implement some basic system calls for demonstration.
I make a few minor changes and one or two major changes. You can find the full source with:
git clone http://kmcg3413.net/armthin.git git checkout origin/bPHASE1
This should give you the source with all the changes below. If you have been following along from the previous pages then you will only have to cover these simple changes. If you are new then I recommend you start from IRQ, Timer, And PIC.
New Define For Timer Support
/* max number of ticks for a task */
#define KTASKTICKS 10000
This sets the time in which threads are interrupted for a context switch. The lower the higher the frequency or maximum time a thread can execute.
Basic Linked List Utility Functions
I like these functions because they can work with any data structure that has the fields at the top. It can be cast to type LL and used that way.
typedef struct _LL {
struct _LL *next;
struct _LL *prev;
} LL;
void ll_add(void **p, void *i) {
LL *_i;
_i = (LL*)i;
_i->next = *p;
if (*p) {
_i->prev = ((LL*)(*p))->prev;
} else {
_i->prev = 0;
}
if (p) {
*p = _i;
}
}
void ll_rem(void **p, void *i) {
LL *_i;
_i = (LL*)i;
if (_i->prev) {
_i->prev->next = _i->next;
}
if (_i->next) {
_i->next->prev = _i->prev;
}
if (p) {
if (*p == i) {
if (_i->prev) {
*p = _i->prev;
} else {
*p = _i->next;
}
}
}
}
Separate Scheduler Function
I have added support for sleeping threads, and a wake up flag. This is by no means a correct design, but rather just a demonstration of how you could put threads to sleep and wake them up. It also supports a timeout which allows a thread to not only wake up with an external signal but also after a specified amount of time.
void ksched() {
KSTATE *ks;
KTHREAD *kt;
uint32 __lr, __sp, __spsr;
uintptr page;
ks = (KSTATE*)KSTATEADDR;
/* if valid process and thread then store */
if (ks->cproc && ks->cthread) {
/*
1. store register on stack in thread struct
2. access hidden registers and store in thread struct
*/
kt = ks->cthread;
kt->pc = ((uint32*)KSTACKEXC)[-1];
kt->r12 = ((uint32*)KSTACKEXC)[-2];
kt->r11 = ((uint32*)KSTACKEXC)[-3];
kt->r10 = ((uint32*)KSTACKEXC)[-4];
kt->r9 = ((uint32*)KSTACKEXC)[-5];
kt->r8 = ((uint32*)KSTACKEXC)[-6];
kt->r7 = ((uint32*)KSTACKEXC)[-7];
kt->r6 = ((uint32*)KSTACKEXC)[-8];
kt->r5 = ((uint32*)KSTACKEXC)[-9];
kt->r4 = ((uint32*)KSTACKEXC)[-10];
kt->r3 = ((uint32*)KSTACKEXC)[-11];
kt->r2 = ((uint32*)KSTACKEXC)[-12];
kt->r1 = ((uint32*)KSTACKEXC)[-13];
kt->r0 = ((uint32*)KSTACKEXC)[-14];
kt->cpsr = ((uint32*)KSTACKEXC)[-15];
/* switch to system mode get hidden registers then switch back */
asm("mrs r0, cpsr \n\
mov r1, r0 \n\
bic r0, r0, #0x1f \n\
orr r0, r0, #0x1f \n\
msr cpsr, r0 \n\
mov %[sp], sp \n\
mov %[lr], lr \n\
msr cpsr, r1 \n\
" : [sp]"=r" (__sp), [lr]"=r" (__lr));
kt->sp = __sp;
kt->lr = __lr;
}
if (!ks->cproc) {
/* initial start */
ks->cproc = ks->procs;
ks->cthread = ks->procs->threads;
}
while (1) {
if (ks->cthread) {
/* get next thread */
ks->cthread = ks->cthread->next;
}
/* if none get next process */
if (!ks->cthread) {
/* get next process */
if (ks->cproc) {
ks->cproc = ks->cproc->next;
}
/* if none get first process */
if (!ks->cproc) {
ks->cproc = ks->procs;
}
/* get first thread */
if (ks->cproc) {
ks->cthread = ks->cproc->threads;
} else {
ks->cthread = 0;
}
}
if (!ks->cthread) {
break;
}
/* if current thread is sleeping and current thread equals last thread */
if ((ks->cthread->flags & KTHREAD_SLEEPING) && (ks->cthread == kt)) {
PANIC("all-threads-sleeping");
}
/* only wakeup if it is sleeping */
if (ks->cthread->flags & KTHREAD_SLEEPING) {
if (ks->cthread->timeout > 0 && ks->ctime > ks->cthread->timeout) {
//kprintf("WOKE UP (timeout) %x\n", ks->cthread);
/* wake up thread if passed timeout */
ks->cthread->flags &= ~KTHREAD_SLEEPING;
ks->cthread->r0 = 0;
break;
}
/* wakeup thread is set to be woken up */
if (ks->cthread->flags & KTHREAD_WAKEUP) {
//kprintf("WOKE UP (signal) %x\n", ks->cthread);
ks->cthread->flags &= ~(KTHREAD_WAKEUP | KTHREAD_SLEEPING);
ks->cthread->r0 = ks->ctime - ks->cthread->timeout;
break;
}
} else {
/* run this thread */
break;
}
/* thread is sleeping or not able to run */
}
/* hopefully we got something or the system should deadlock */
kt = ks->cthread;
/*
load registers
*/
((uint32*)KSTACKEXC)[-1] = kt->pc;
((uint32*)KSTACKEXC)[-2] = kt->r12;
((uint32*)KSTACKEXC)[-3] = kt->r11;
((uint32*)KSTACKEXC)[-4] = kt->r10;
((uint32*)KSTACKEXC)[-5] = kt->r9;
((uint32*)KSTACKEXC)[-6] = kt->r8;
((uint32*)KSTACKEXC)[-7] = kt->r7;
((uint32*)KSTACKEXC)[-8] = kt->r6;
((uint32*)KSTACKEXC)[-9] = kt->r5;
((uint32*)KSTACKEXC)[-10] = kt->r4;
((uint32*)KSTACKEXC)[-11] = kt->r3;
((uint32*)KSTACKEXC)[-12] = kt->r2;
((uint32*)KSTACKEXC)[-13] = kt->r1;
((uint32*)KSTACKEXC)[-14] = kt->r0;
((uint32*)KSTACKEXC)[-15] = kt->cpsr;
/* switch into system mode restore hidden registers then switch back */
asm("mrs r0, cpsr \n\
mov r1, r0 \n\
bic r0, r0, #0x1f \n\
orr r0, r0, #0x1f \n\
msr cpsr, r0 \n\
mov sp, %[sp] \n\
mov lr, %[lr] \n\
msr cpsr, r1 \n\
" : : [sp]"r" (kt->sp), [lr]"r" (kt->lr));
/* set TLB table for user space (it can be zero for kernel) */
kvmm2_getphy(&ks->vmm, (uintptr)ks->cproc->vmm.table, &page);
arm4_tlbset1(page);
/*
Invalidate all unlocked entries...
..according to the manual there may be a better way to invalidate,
only some entries per process. But, for now this should work.
If you do not do this then the TLB does not flush and old entries
from the previous process will still be in the TLB cache.
*/
asm("mcr p15, #0, r0, c8, c7, #0");
//kprintf("SWITCH-TO thread:%x cpsr:%x fp:%x sp:%x pc:%x\n", kt, kt->cpsr, kt->r11, kt->sp, kt->pc);
uint32 *p;
if (!kvmm2_getphy(&ks->cproc->vmm, 0x90000000, (uintptr*)&p)) {
//kprintf("NO STACK EXISTS??\n");
} else {
//kprintf("writing to stack..%x\n", kt->sp);
//((uint32*)kt->sp)[-1] = 0xbb;
}
if (kvmm2_getphy(&ks->cproc->vmm, 0x80000000, (uintptr*)&p)) {
uint32 x;
//kprintf("CODE PAGE :%x\n", p);
p = (uint32*)(0x80000000);
//((uint32*)KSTACKEXC)[-1] = 0x80000800;
//for (x = 0; x < 1024; ++x) {
// p[x] = 0xeafffffe;
//}
} else {
//kprintf("CODE PAGE????\n");
}
}
Here we have the basic save thread state and load thread state blocks. These possibly could be implemented in a much faster way, but I choose to avoid premature optimization for the sake of being straight forward. In the middle between the save and load state blocks you have the code to choose the next thread. In it's simplest form it simply grabs the next thread in the current process, and if no thread left it grabs the next process and the first thread. It continues this until it has a thread to run. In the form above it does the exact same except it checks if the thread is sleeping, then it checks the timeout (minimum time to sleep for) and if it is expired it sets the thread as awake and runs it. It also checks if any signal has been asserted to the thread and if so (and only if) then the thread is woken if it is sleeping. You might be wondering why I remove the wake signal (bit) only if the thread is sleeping. This has to do with my future design of the IPC for the system.
Added Idle Thread And Kernel Thread
I added an idle thread that does nothing but yield, and a kernel work thread. The idle thread keeps the scheduler with something to switch too when no other threads are running (supposed to be), but in reality in the current code it just switches to it and it yields. Needs some improvement, but it is simple for now.
int ksleep(uint32 timeout) {
int result;
asm(" mov r0, %[in] \n\
swi #101 \n\
mov %[result], r0 \n\
" : [result]"=r" (result) : [in]"r" (timeout));
/* convert from ticks */
return result;
}
void kthread(KTHREAD *th) {
uint32 x;
for (;;) {
ksleep(0xffff);
kserdbg_putc('$');
}
}
void kidle() {
for (;;) {
asm("swi #102");
}
}
The ''kidle'' immediantly yields. The scheduler could be coded differently where it only executes the ''kidle'' if there are no other threads to run, but I decided to keep it simple at this stage.
Creation Of Kernel And Idle Thread
.... in main ....
process = (KPROCESS*)kmalloc(sizeof(KPROCESS));
memset(process, 0, sizeof(KPROCESS));
kvmm2_init(&process->vmm);
ll_add((void**)&ks->procs, process);
th = (KTHREAD*)kmalloc(sizeof(KTHREAD));
memset(th, 0, sizeof(KTHREAD));
ll_add((void**)&process->threads, th);
th->pc = (uintptr)&kthread;
th->flags = 0;
th->cpsr = 0x60000000 | ARM4_MODE_SYS;
/* set stack */
th->sp = (uintptr)kmalloc(1024 * 2) + 1024 * 2 - 8;
th->r0 = (uint32)th;
th = (KTHREAD*)kmalloc(sizeof(KTHREAD));
memset(th, 0, sizeof(KTHREAD));
ll_add((void**)&process->threads, th);
th->pc = (uintptr)&kidle;
th->flags = KTHREAD_KIDLE;
th->cpsr = 0x60000000 | ARM4_MODE_SYS;
/* set stack (dont need anything big for idle thread at the moment) */
th->sp = (uintptr)kmalloc(128) + 128 - 8;
th->r0 = (uint32)th;
ks->idleth = th;
ks->idleproc = process;
....
Here both the kernel thread and idle thread are created under the same process. You could have created a separate process if you like.
New Exception Handlers
void k_exphandler(uint32 lr, uint32 type) {
uint32 *t0mmio;
uint32 *picmmio;
uint32 swi;
KSTATE *ks;
int x;
KTHREAD *kt;
uintptr out;
uint32 r0, r1;
KPROCESS *proc;
KTHREAD *th;
ks = (KSTATE*)KSTATEADDR;
//kserdbg_putc('H');
//kserdbg_putc('\n');
/* clear interrupt in timer so it will lower its INT line
if you do not clear it, an interrupt will
be immediantly raised apon return from this
interrupt
*/
if (type == ARM4_XRQ_IRQ) {
picmmio = (uint32*)0x14000000;
//kprintf("picmmio[PIC_IRQ_STATUS]:%x\n", picmmio[PIC_IRQ_STATUS]);
/*
It is possible that other pins are activated so we just check
this one bit.
1 << 4
*/
if (picmmio[PIC_IRQ_STATUS] & (1<<6)) {
t0mmio = (uint32*)0x13000100;
t0mmio[REG_INTCLR] = 1; /* according to the docs u can write any value */
//kprintf("t0mmio[REG_BGLOAD]:%x ks->ctime:%x\n", t0mmio[REG_BGLOAD], ks->ctime);
ks->ctime += t0mmio[REG_BGLOAD];
ksched();
//kprintf("time:%x\n", t0mmio[REG_VALUE]);
/* go back through normal interrupt return process */
return;
}
}
/*
Get SWI argument (index).
*/
if (type == ARM4_XRQ_SWINT) {
swi = ((uint32*)((uintptr)lr - 4))[0] & 0xffff;
//kprintf("SWI thread:%x code:%x\n", ks->cthread, swi);
//((uint32*)KSTACKEXC)[-14] = R0;
//((uint32*)KSTACKEXC)[-13] = R1;
switch (swi) {
case KSWI_WAKEUP:
/* wake up thread function */
r0 = ((uint32*)KSTACKEXC)[-14];
r1 = ((uint32*)KSTACKEXC)[-13];
for (proc = ks->procs; proc; proc = proc->next) {
if ((uint32)proc == r0) {
for (th = proc->threads; th; th = th->next) {
if ((uint32)th == r1) {
/* wake up thread */
th->flags |= KTHREAD_WAKEUP;
}
}
}
}
break;
case KSWI_GETTICKPERSECOND:
((uint32*)KSTACKEXC)[-14] = ks->tpers;
break;
case KSWI_SLEEP:
/* thread sleep function */
r0 = ((uint32*)KSTACKEXC)[-14];
//kprintf("SLEEPING thread:%x timeout:%x\n", ks->cthread, r0);
if (ks->cthread) {
ks->cthread->flags |= KTHREAD_SLEEPING;
ks->cthread->timeout = r0 + ks->ctime;
}
ksched();
break;
case KSWI_YEILD:
ksched();
break;
default:
break;
}
return;
}
if (type != ARM4_XRQ_IRQ && type != ARM4_XRQ_FIQ && type != ARM4_XRQ_SWINT) {
/*
Ensure, the exception return code is correctly handling LR with the
correct offset. I am using the same return for everything except SWI,
which requires that LR not be offset before return.
*/
KTHREAD *tmp;
tmp = ks->cthread;
kprintf("!EXCEPTION\n");
kprintf("type:%x cproc:%x cthread:%x lr:%x\n", type, ks->cproc, ks->cthread, lr);
ll_rem((void**)&ks->cproc->threads, ks->cthread);
ks->cthread = ks->cthread->next;
ksched();
kdumpthreadinfo(tmp);
}
return;
}
The biggest difference here is the addition of systems call and the termination of a thread when it has an exception.
Added memset
I added a useful utility function memset.
void memset(void *p, uint8 v, uintptr sz) {
uint8 *_p;
uintptr x;
_p = (uint8*)p;
for (x = 0; x < sz; ++x) {
_p[x] = v;
}
}
Changes In kelfload
int kelfload(KPROCESS *proc, uintptr addr, uintptr sz) {
ELF32_EHDR *ehdr;
ELF32_SHDR *shdr;
uint32 x, y;
uintptr page, oldpage;
KSTATE *ks;
uint8 *fb;
KTHREAD *th;
kprintf("loading elf into memory space\n");
ks = (KSTATE*)KSTATEADDR;
ehdr = (ELF32_EHDR*)addr;
if (ehdr->e_machine != EM_ARM) {
kprintf("kelfload: not ARM machine!\n");
return 0;
}
if (ehdr->e_ident[4] != 0x1) {
kprintf("kelfload: not ELF32 object\n");
return 0;
}
if (!proc->vmm.table) {
kvmm2_init(&proc->vmm);
}
th = (KTHREAD*)kmalloc(sizeof(KTHREAD));
memset(th, 0, sizeof(KTHREAD));
ll_add((void**)&proc->threads, th);
th->pc = ehdr->e_entry;
th->flags = 0;
th->cpsr = 0x60000000 | ARM4_MODE_USER;
/* set stack */
th->sp = 0x90001000;
/* pass address of serial output as first argument */
th->r0 = 0xa0000000;
/* map serial output mmio */
kvmm2_mapsingle(&proc->vmm, 0xa0000000, 0x16000000, TLB_C_AP_FULLACCESS);
/* map stack page (4K) */
kvmm2_allocregionat(&proc->vmm, 1, 0x90000000, TLB_C_AP_FULLACCESS);
/* map address space so we can work directly with it */
kvmm2_getphy(&ks->vmm, (uintptr)proc->vmm.table, &page);
oldpage = arm4_tlbget1();
arm4_tlbset1(page);
/* flush the TLB */
asm("mcr p15, #0, r0, c8, c7, #0");
// e_shoff - section table offset
// e_shentsize - size of each section entry
// e_shnum - count of entries in table
for (x = 0; x < ehdr->e_shnum; ++x) {
shdr = (ELF32_SHDR*)(addr + ehdr->e_shoff + x * ehdr->e_shentsize);
if (shdr->sh_addr != 0) {
/* load this into memory */
// sh_offset - byte offset in module
// sh_size - size of section in module
// sh_addr - address to load at
kvmm2_allocregionat(&proc->vmm, kvmm2_rndup(shdr->sh_size), shdr->sh_addr, TLB_C_AP_FULLACCESS);
fb = (uint8*)(addr + shdr->sh_offset);
/* copy */
for (y = 0; y < shdr->sh_size; ++y) {
((uint8*)shdr->sh_addr)[y] = fb[y];
}
}
}
/* restore previous address space */
arm4_tlbset1(oldpage);
/* flush the TLB */
asm("mcr p15, #0, r0, c8, c7, #0");
}
Here we simple allocate memory for the sections and then copy them into memory. I switch address spaces and flush the TLB to make this easier to perform. It could be faster just to map the memory into kernel space and copy to it in certain situations, but I decided that this method was the simplest.
Changed To Creation Of Process From Modules
....
#define KMODTYPE_ELFUSER 1
/*
create a task for any attached modules of the correct type
*/
kprintf("looking at attached modules\n");
for (m = kPkgGetFirstMod(); m; m = kPkgGetNextMod(m)) {
kprintf("looking at module\n");
if (m->type == KMODTYPE_ELFUSER) {
/* create new process */
process = (KPROCESS*)kmalloc(sizeof(KPROCESS));
memset(process, 0, sizeof(KPROCESS));
ll_add((void**)&ks->procs, process);
/* will create thread in process */
kelfload(process, (uintptr)&m->slot[0], m->size);
}
}
.....
I simply changed from creating of a task to the creation of a process, and kelfload was changed to create a thread of the passed process instead of a task.
SWI Exception Entry And Exit Changed
These were changed to push SPSR so it can be saved and restored easily from inside our new sched() function.
#define KEXP_TOPSWI \
uint32 lr; \
asm("mov sp, %[ps]" : : [ps]"i" (KSTACKEXC)); \
asm("push {lr}"); \
asm("push {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}"); \
asm("mrs r0, spsr"); \
asm("push {r0}"); \
asm("mov %[ps], lr" : [ps]"=r" (lr));
#define KEXP_BOTSWI \
asm("pop {r0}"); \
asm("msr spsr, r0"); \
asm("pop {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}"); \
asm("LDM sp!, {pc}^")
The SWI entry and exit assembly now saves everything the same way as the other exception handlers allowing the scheduler to be called to save the current thread and load the next meaning system calls can switch threads for sleep and yield.
Added System Call Numbers
#define KSWI_WAKEUP 100
#define KSWI_SLEEP 101
#define KSWI_YEILD 102
#define KSWI_GETTICKPERSECOND 103
New KPROCESS And Updated KTHREAD
typedef struct _KTHREAD {
struct _KTHREAD *next;
struct _KTHREAD *prev;
uint64 timeout; /* when to wakeup */
uint8 flags;
uint32 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, sp, lr, cpsr, pc;
} KTHREAD;
typedef struct _KPROCESS {
struct _KPROCESS *next;
struct _KPROCESS *prev;
KVMMTABLE vmm;
KTHREAD *threads;
} KPROCESS;
The KTHREAD was updated with linked list fields added, timeout, and flags. The KPROCESS is new and was added to provide a process like structure to tasking. The vmm field was moved from the thread structure to the process structure since all threads share the same address space.
Changes To KSTATE
typedef struct _KSTATE {
- /* process/thread support */
- KTHREAD threads[0x10];
- uint8 threadndx;
- uint8 iswitch;
-
/* new process/thread support */
+ KPROCESS *procs;
+ KPROCESS *cproc;
+ KTHREAD *cthread;
/* physical and heap memory management */
KHEAPBM hphy; /* kernel physical page heap */
KHEAPBM hchk; /* data chunk heap */
+ /* time management */
+ uint64 ctime;
+ uint32 tpers; /* ticks per second */
+
/* virtual memory management */
KVMMTABLE vmm; /* kernel virtual memory map */
uint32 vmm_ucte; /* unused coarse table entries */
The old thread/task structures removed and the new field added. A pointer to the first process entry (linked list) was added, and a pointer to the current process and thread.
Second Module Added
int _start(unsigned int *smmio) {
int x;
int y;
for(;;);
for (;;) {
for (x = 0; x < 0xfffff; ++x);
smmio[0] = 'A';
}
return 0;
}
First Module
This module demonstrates going to sleep.
unsigned int getTicksPerSecond() {
unsigned int out;
asm(" swi #103 \n\
mov %[out], r0 \n\
" : [out]"=r" (out));
return out;
}
void sleep(unsigned int timeout) {
asm(" mov r0, %[in] \n\
swi #101 \n\
" : : [in]"r" (timeout));
}
int _start(unsigned int *smmio) {
int x;
int y;
unsigned int tps;
tps = getTicksPerSecond();
for (;;) {
smmio[0] = 'G';
smmio[0] = 'K';
sleep(tps);
}
return 0;
}
This module demonstrates going to sleep instead of busy looping which wastes CPU that could be used for another thread.
Change In Timer Initialization
t0mmio = (uint32*)0x13000100;
t0mmio[REG_LOAD] = KTASKTICKS;
t0mmio[REG_BGLOAD] = KTASKTICKS;
t0mmio[REG_CTRL] = CTRL_ENABLE | CTRL_MODE_PERIODIC | CTRL_SIZE_32 | CTRL_DIV_NONE | CTRL_INT_ENABLE;
t0mmio[REG_INTCLR] = ~0; /* make sure interrupt is clear (might not be mandatory) */
ks->tpers = 1000000;
We now use KTASKTICKS and initialize ks->tpers which is read by threads using a software interrupts (see exphandler). This allows them to adjust their ticks for the sleep call to sleep for a certain amount of actual real time (see exphandler SWI handler).
I also switched to using a 1MHZ timer just because I have not yet figured out how to detect the system clock speed. I have a few ideas but I am currently working on more important things, but it might be a good exercise for the reader to try to do this. The board may allow you to determine the system clock speed. Also, an idea is to use the 1MHZ timer to estimate the system clock synchronized timer 0. This might yield a different value on different systems depending on how the system clock timer is implemented in QEMU, but on real hardware it should always be the same.