Porting Rust standard library

From OSDev Wiki
Jump to navigation Jump to search
Difficulty level
Difficulty 1.png
Beginner

This page is under construction! This page or section is a work in progress and may thus be incomplete. Its content may be changed in the near future.

Guide

This guide shows how to get the standard library to compile and run for a custom target.

By the end you should be able to compile a project with

cargo +dev-x86_64-unknown-myos r --target x86_64-unknown-myos

Get sources

git clone https://github.com/rust-lang/rust

Configuration

# config.toml
profile = "compiler"
change-id = 125535

[build]
host = ["x86_64-unknown-linux-gnu"]
target = ["x86_64-unknown-linux-gnu", "x86_64-unknown-myos"]

[rust]
incremental = true

Adding the target

Adding a target to the rust compiler takes several files. rustc_target must be told about the new target with a base spec and a target spec. The bootstrap crate must be told that the new target, while not in the downloaded bootstrap compiler, is valid. Also, a test is added for the new target that checks assembly code generation. The test is not strictly necessary, but tidy, the rust compiler's style enforcer, will not pass without it, so it is good practice.

In addition, the rust compiler must be given a path to a valid crt0 object file. Rustc does not supply a _start symbol of its own, and the defalt linker doesn't provide one either, so we must provide our own. (TODO: Figure out self-contained linking of the crt0).

// compiler/rustc_target/src/spec/base/myos.rs
use crate::spec::crt_objects;
use crate::spec::{Cc, LinkerFlavor, Lld, RelocModel, StackProbeType, TargetOptions};

pub fn opts() -> TargetOptions {
    TargetOptions {
        os: "myos".into(),
        linker: Some("rust-lld".into()),
        linker_flavor: LinkerFlavor::Gnu(Cc::No, Lld::Yes),
        stack_probes: StackProbeType::Inline,
        relocation_model: RelocModel::Static,
        pre_link_objects: crt_objects::pre_myos(),
        post_link_objects: crt_objects::post_myos(),
        ..Default::default()
    }
}


// compiler/rustc_target/src/spec/targets/x86_64_unknown_myos.rs
use crate::spec::{base, PanicStrategy, Target, TargetMetadata};

pub fn target() -> Target {
    let mut base = base::myos::opts();
    base.cpu = "x86-64".into();
    base.disable_redzone = true;
    base.panic_strategy = PanicStrategy::Abort;
    base.features = "-mmx,-sse,+soft-float".into();

    Target {
        llvm_target: "x86_64-unknown-none".into(),
        pointer_width: 64,
        data_layout:
            "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128".into(),
        arch: "x86_64".into(),
        options: base,
        metadata: TargetMetadata {
            description: None,
            tier: None,
            host_tools: None,
            std: None,
        },
    }
}


--- a/compiler/rustc_target/src/spec/base/mod.rs
+++ b/compiler/rustc_target/src/spec/base/mod.rs
@@ -18,6 +18,7 @@
 pub(crate) mod linux_ohos;
 pub(crate) mod linux_uclibc;
 pub(crate) mod msvc;
+pub(crate) mod myos;
 pub(crate) mod netbsd;
 pub(crate) mod nto_qnx;
 pub(crate) mod openbsd;


--- a/compiler/rustc_target/src/spec/mod.rs
+++ b/compiler/rustc_target/src/spec/mod.rs
@@ -1826,6 +1826,8 @@ fn $module() {
 
     ("x86_64-unknown-linux-none", x86_64_unknown_linux_none),
 
+    ("x86_64-unknown-myos", x86_64_unknown_myos),
+
 }


--- a/compiler/rustc_target/src/spec/crt_objects.rs
+++ b/compiler/rustc_target/src/spec/crt_objects.rs
@@ -106,6 +106,14 @@ pub(super) fn post_mingw() -> CrtObjects {
     all("rsend.o")
 }
 
+pub(super) fn pre_myos() -> CrtObjects {
+    all(<PATH TO CRT0 HERE>)
+}
+
+pub(super) fn post_myos() -> CrtObjects {
+    new(&[])
+}
+


--- a/src/bootstrap/src/core/sanity.rs
+++ b/src/bootstrap/src/core/sanity.rs
@@ -40,6 +40,7 @@ pub struct Finder {
 #[cfg(not(feature = "bootstrap-self-test"))]
 const STAGE0_MISSING_TARGETS: &[&str] = &[
     // just a dummy comment so the list doesn't get onelined
+    "x86_64-unknown-myos",
 ];


--- a/src/bootstrap/src/lib.rs
+++ b/src/bootstrap/src/lib.rs
@@ -94,7 +94,7 @@
     (Some(Mode::Std), "no_sync", None),
     /* Extra values not defined in the built-in targets yet, but used in std */
     (Some(Mode::Std), "target_env", Some(&["libnx", "p2"])),
-    (Some(Mode::Std), "target_os", Some(&["visionos"])),
+    (Some(Mode::Std), "target_os", Some(&["myos", "visionos"])),
     (Some(Mode::Std), "target_arch", Some(&["arm64ec", "spirv", "nvptx", "xtensa"])),
     (Some(Mode::ToolStd), "target_os", Some(&["visionos"])),


--- a/tests/assembly/targets/targets-elf.rs
+++ b/tests/assembly/targets/targets-elf.rs
@@ -561,6 +561,9 @@
 //@ revisions: x86_64_unknown_linux_none
 //@ [x86_64_unknown_linux_none] compile-flags: --target x86_64-unknown-linux-none
 //@ [x86_64_unknown_linux_none] needs-llvm-components: x86
+//@ revisions: x86_64_unknown_myos
+//@ [x86_64_unknown_myos] compile-flags: --target x86_64-unknown-myos
+//@ [x86_64_unknown_myos] needs-llvm-components: x86
 //@ revisions: x86_64_unknown_netbsd
 //@ [x86_64_unknown_netbsd] compile-flags: --target x86_64-unknown-netbsd
 //@ [x86_64_unknown_netbsd] needs-llvm-components: x86

Adapt library/std

In addition to rustc, std must also be modified to support the target. By default, std will error on build if the OS isn't explicitly supported, so we must add our OS to the list of supported OSes. In addition, we must provide a PAL (Platform Abstraction Layer) to tell std how to interact with our OS.

--- a/library/std/build.rs
+++ b/library/std/build.rs
@@ -48,6 +48,7 @@ fn main() {
         || target_os == "uefi"
         || target_os == "teeos"
         || target_os == "zkvm"
+        || target_os == "myos"
 
         // See src/bootstrap/src/core/build_steps/synthetic_targets.rs
         || env::var("RUSTC_BOOTSTRAP_SYNTHETIC_TARGET").is_ok()

Copy library/std/src/sys/pal/unsupported to library/std/src/sys/pal/myos.

--- a/library/std/src/sys/pal/mod.rs
+++ b/library/std/src/sys/pal/mod.rs
@@ -61,6 +61,9 @@
     } else if #[cfg(target_os = "zkvm")] {
         mod zkvm;
         pub use self::zkvm::*;
+    } else if #[cfg(target_os = "myos")] {
+        mod myos;
+        pub use self::myos::*;
     } else {
         mod unsupported;
         pub use self::unsupported::*;

Add toolchain

rustup toolchain link dev-x86_64-unknown-myos ~/Documents/rust/rust/build/x86_64-unknown-linux-gnu/stage2

Making the standard library functional

Even though both rustc and std know about the target, programs compiled using the toolchain will crash immediately with a stack overflow. This is because the standard library requires two things from the OS to be able to initialize. A memory allocator and thread local storage (TLS).

Memory allocator

Implementing a memory allocator is done in the PAL's alloc.rs file and requires implementing the GlobalAlloc trait. The following bump allocator will work to let the stdlib initialize.`

// library/std/src/sys/pal/myos/alloc.rs
use crate::{
    alloc::{GlobalAlloc, Layout, System},
    ptr,
    sync::atomic::{AtomicUsize, Ordering},
};

#[repr(align(4096))]
struct HeapData([u8; 65536]);

static mut HEAP_DATA: HeapData = HeapData([0; 65536]);
static HEAP_USED: AtomicUsize = AtomicUsize::new(0);

#[stable(feature = "alloc_system_type", since = "1.28.0")]
unsafe impl GlobalAlloc for System {
    #[inline]
    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
        if layout.align() > 8 {
            return ptr::null_mut();
        }
        let num_blocks = if layout.size() % 8 == 0 {
            layout.size() / 8
        } else {
            (layout.size() / 8) + 1
        };
        HEAP_USED.fetch_add(num_blocks, Ordering::Relaxed);
        let ptr = unsafe { ptr::addr_of_mut!(HEAP_DATA.0[HEAP_USED.load(Ordering::Relaxed) - num_blocks ]) as *mut u8 };
        ptr
    }

    #[inline]
    unsafe fn dealloc(&self, _ptr: *mut u8, _layout: Layout) {}
}

Thread local storage

Global statics (easiest, single-threaded only)

WARNING: This WILL not work on multi-threaded systems.

If your OS is single-threaded or you don't want to implement full TLS before adding multithreading to your PAL, Rust can implement TLS via global statics.

--- a/library/std/src/sys/thread_local/mod.rs
+++ b/library/std/src/sys/thread_local/mod.rs
@@ -7,7 +7,7 @@
 // "static" is for single-threaded platforms where a global static is sufficient.
 
 cfg_if::cfg_if! {
-    if #[cfg(any(all(target_family = "wasm", not(target_feature = "atomics")), target_os = "uefi"))] {
+    if #[cfg(any(all(target_family = "wasm", not(target_feature = "atomics")), target_os = "uefi", target_os = "myos"))] {
         #[doc(hidden)]
         mod static_local;
         #[doc(hidden)]

OS APIs (slow, easier than ELF native)

This option is slower than the other multi-threaded compatible version, but is easier to implement, requiring only to implement the functions in the PAL's thread_local_key.rs file. The key is a value unique to a TLS variable, though shared amongst threads. This is rustc's default method of doing TLS.

Native ELF TLS (fastest)

This option is the fastest way of doing TLS, but requires more complex suport from the OS. To implement it, see the linked wiki article on TLS. Enabling it in rustc is done by adding a target option for your OS.

--- a/compiler/rustc_target/src/spec/base/myos.rs
+++ b/compiler/rustc_target/src/spec/base/myos.rs
@@ -10,6 +10,7 @@ pub fn opts() -> TargetOptions {
         relocation_model: RelocModel::Static,
         pre_link_objects: crt_objects::pre_myos(),
         post_link_objects: crt_objects::post_myos(),
+        has_thread_local: true,
         ..Default::default()
     }
 }

Basic printing

With memory allocation and TLS in place, the Rust stdlib is now functional enough to not crash. However, it's fairly useless as no IO functions work. Adding print output is a natural next step, and will also allow easier debugging of panics.

Adding print support is fairly simple given the architecture of the PAL, and merly requires filling out the Write implementations of stdout/err in stdio.rs. In addition, the panic_output function shoudl be set to the desired output stream for panic messages, usually stderr.

Example code is provided below. The given code assumes syscalls are done with interrupt 0x80 with the number in rax and the first parameter in rcx, and syscall 0 takes a byte to write to some text output device. You will probably need to change the syscall number and parameters for your OS, but this should be a basic starting point.

--- a/library/std/src/sys/pal/myos/stdio.rs
+++ b/library/std/src/sys/pal/myos/stdio.rs
@@ -1,3 +1,4 @@
+use crate::arch::asm;
 use crate::io;
 
 pub struct Stdin;
@@ -24,6 +25,11 @@ pub const fn new() -> Stdout {
 
 impl io::Write for Stdout {
     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        for byte in buf {
+            unsafe {
+                asm!("int 0x80", in("rax") 0, in ("rcx") *byte as u64);
+            };
+        }
         Ok(buf.len())
     }
 
@@ -40,6 +46,11 @@ pub const fn new() -> Stderr {
 
 impl io::Write for Stderr {
     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        for byte in buf {
+            unsafe {
+                asm!("int 0x80", in("rax") 0, in ("rcx") *byte as u64);
+            };
+        }
         Ok(buf.len())
     }
 
@@ -54,6 +65,6 @@ pub fn is_ebadf(_err: &io::Error) -> bool {
     true
 }
 
-pub fn panic_output() -> Option<Vec<u8>> {
-    None
+pub fn panic_output() -> Option<impl io::Write> {
+    Some(Stderr::new())
 }

Now you should be able to compile and run the default Rust hello world program for your target and see it print to screen.

Runtime

Integrating a crate

If you use a crate for the runtime (e.g. myos_rt), you can add it as a dependency to the standard library:

[target.'cfg(target_os = "myos")'.dependencies]
myos_rt = { version = "*", features = ["rustc-dep-of-std"] }

The rustc-dep-of-std feature is necessary since libcore et al. don't exist yet.

Modify the Cargo.toml of your runtime library to include the feature and propagate it up, e.g.:

[dependencies]
cfg-if = "1.0"
core = { version = "1.0.0", optional = true, package = "rustc-std-workspace-core" }
compiler_builtins = { version = "0.1", optional = true }

[features]
rustc-dep-of-std = [
	"core",
	"compiler_builtins",
	"cfg-if/rustc-dep-of-std"
]

Do keep in mind that the same crate with different feature flags are seen as different crates by the compiler. This means that if you any globals in the runtime crate and have a project that uses both stdlib and your runtime crate there will be two separate sets of those globals. One way to work around this is by giving these globals an explicit name with #[export_name = "__rt_whatever"] and weakly linking them with #[linkage = "weak"].

Troubleshooting

error[E0463]: can't find crate for `compiler_builtins`

Add compiler_builtins as a dependency for the crates you use in stdlib, e.g.:

[dependencies]
core = { version = "1.0.0", optional = true, package = "rustc-std-workspace-core" }
compiler_builtins = { version = "*", optional = true } # <--

[features]
rustc-dep-of-std = [
	"core",
	"compiler_builtins/rustc-dep-of-std", # <--
]