From 7a84642235a35d04c6ed6dc4bc22b61e7c987077 Mon Sep 17 00:00:00 2001 From: Federico Angelilli Date: Sun, 2 Mar 2025 02:14:58 +0100 Subject: Update draft --- content/posts/pointer-tagging/highbits.c | 14 +-- content/posts/pointer-tagging/index.md | 186 +++++++++++++++++++++++-------- content/posts/pointer-tagging/test.c | 2 + themes/chilldark | 2 +- 4 files changed, 147 insertions(+), 57 deletions(-) diff --git a/content/posts/pointer-tagging/highbits.c b/content/posts/pointer-tagging/highbits.c index 6d1e19c..5ca5f29 100644 --- a/content/posts/pointer-tagging/highbits.c +++ b/content/posts/pointer-tagging/highbits.c @@ -10,11 +10,11 @@ typedef struct { // Only the integer tag sets the highest bit typedef enum { - TAG_OBJECT = 0, // 0b000000 - TAG_INTEGER = 0x20, // 0b100000 - TAG_FLOAT = 2, // 0b000010 - TAG_STRING = 3, // 0b000011 - TAG_TINYSTR = 4, // 0b000100 + TAG_OBJECT = 0, // 0b_000000 + TAG_INTEGER = 0x40, // 0b1000000 + TAG_FLOAT = 2, // 0b_000010 + TAG_STRING = 3, // 0b_000011 + TAG_TINYSTR = 4, // 0b_000100 } value_tag_t; typedef uintptr_t value_t; @@ -34,9 +34,9 @@ typedef uintptr_t value_t; #define VALUE_TO_OBJECT(val) (void *)VALUE_UNSET_TAG(val) // Integer value -#define INTEGER_SHIFT (VALUE_BITS - 1 - 1) +#define INTEGER_SHIFT (VALUE_BITS - 1) #define INTEGER_MASK ((value_t)1 << INTEGER_SHIFT) -#define INTEGER_HIGH_MASK ((value_t)7 << (INTEGER_SHIFT - 1)) +#define INTEGER_HIGH_MASK ((value_t)3 << (INTEGER_SHIFT - 1)) #define INTEGER_SIGN_BIT ((value_t)1 << (INTEGER_SHIFT - 1)) #define INTEGER_MAX (INTEGER_SIGN_BIT - 1) diff --git a/content/posts/pointer-tagging/index.md b/content/posts/pointer-tagging/index.md index 25d61ff..3794ade 100644 --- a/content/posts/pointer-tagging/index.md +++ b/content/posts/pointer-tagging/index.md @@ -6,14 +6,18 @@ tags = [ "low-level", "memory", "optimization", "langdev", "interpreter" ] draft = true +++ -Let's analyze and implement **tagged pointers**, a clever optimization +Let's explore **tagged pointers**, a clever optimization that can reduce memory footprint and boost performance. -They have a long history[^tagarch], but have still many uses to this day. -I will give a general explanation and then focus on their applications in -language design. +Despite having a widespread usage and long history[^tagarch], +pointer tagging remains a relatively obscure topic. +That's because most of its applications are very low level, +such as operating systems and programming language interpreters. + +We will look behind the scenes, analyzing and implementing them, +with a focus on applications in language development. ## Preliminary theory @@ -25,8 +29,6 @@ In principle, all of these addresses are valid and potentially used. While this could be the case for older or embedded systems, modern ones won't allocate certain ranges. Consequently, some of the bits in a pointer will be unused[^addressing]. -[^addressing]: They might still be used in certain circumstances. For example, Linux marks kernel addresses with a 1 in the MSB. - First, let's see the behaviour of 32-bit architectures (e.g., x86, Arm32). They use *32-bit words* (hence their name), and can address up to 4 GB (4 × 1024³ bytes) of memory[^pae]. Since these systems can use all of the 4 gigs, are the bits in the addresses fully utilized? Well, not necessarily. @@ -66,7 +68,7 @@ Canonical addresses on x86_64 must have bits 63 through 48 set to zeros or ones The Linux kernel uses the bit 47 to distinguish between user and kernel address space[^mmap]. This means that in user space, bits 63:47 will always be zero. -Linux 4.14 added support for 5-level paging, which allows *57-bit virtual addresses*. +Linux 4.14 added support for 5-level paging, which allows *57-bit virtual addresses*[^linux5page]. Everything said above applies, but for bit 56 instead of 47. By default, addresses above 47-bit will not be allocated for user space[^page5]. @@ -155,44 +157,93 @@ By storing some values directly in the pointer, we can avoid reading memory in t This diminishes the probability of cache misses, which are quite expensive. Also, if the memory in the language is managed, eliminating a whole class of tiny -allocations will remove some of the pressure on the garbage collector. +allocations will lessen the pressure on the garbage collector. -## Technique comparison +## A word of caution -We will see in details three techniques: tagged union, low bits tagging, and high bits tagging. +Tagged pointers are a very powerful optimization, used by many critical systems and applications. +However, it is in no way a silver bullet and should be used with caution. -### Tagged union +Firstly, they work only in certain environments and are intrinsically platform-dependent. +It's important to thoroughly check the architecture, ABI, and operating system of your target. -### Low bits tagging +If you need to target multiple platforms, a common approach is to support a generic +value type implementation, for example, with the tagged union. +Then, with conditional compilation, you can enable the tagged pointer representation on specific targets. -### High bits tagging +```c +#if PTR_TAG_SUPPORT +#include "lowbits.h" +#else +#include "union.h" +#endif +``` -### Alternative approaches +The memory alignment is vital when you are using low bits tagging and must be guaranteed. +Tagging the pointers to objects on the stack should be avoided, since they could be byte-aligned (e.g., char). -There are also other ways to squeeze information in a pointer, which I will briefly mention. +Also, usually you will have to write a custom allocator (or a wrapper). +For example, this function ensures that allocations have the last 3 bits cleared (8-byte alignment). -A technique called *NaN boxing* makes use specific bit configurations of the IEEE 754 double -to store either a number or a pointer (up to 52 bits). -This is particularly used in languages where double is the dominant number type[^nan]. +```c +void *guarded_alloc(size_t size) { + void *ptr = malloc(size); + assert(((uintptr_t)ptr & 0x7) == 0); + return ptr; +} +``` -There is also *pointer compression*, used by the V8 Javascript engine[^v8]. -Instead of using real addresses, all the allocations are represented by 32-bit offsets in a 4 GiB heap. -By *compressing* the pointer size, the memory footprint is reduced by up to 40%. +Another disadvantage is the lack of interoperability with libraries. +Tagged pointers cannot be used by code that is unaware of them, greatly limiting +the ability to interact with common libraries. +This is less problematic for language interpreters, since they frequently +require wrappers for libraries anyway. -## A word of caution +There is the risk of using a library that has its own tags for pointers, which +could cause all kinds of unintended behaviour. +In general, pointers from other sources are a security risk as they could be tagged incorrectly. + +We also cannot ignore the complexity introduced by pointer tagging, +which makes debugging and tracking errors harder. + +## Real world uses + +Now that the obligatory warning is out of the way, let's see some tagged pointers out in the wild. -This does not work on funky architectures. FPGA, Xeon Phi +LLVM implements *PointerIntPair*, which automatically bitmangles the integer into +the low bits of the pointer. Note that this optimization is applied only when possible and +for small integer values. +A tagged representation is used by both Caml[^caml] and OCaml[^ocaml], despite being compiled +and strongly-typed languages. +This uniform value type allows them to implement generic functions in a simple way. -While there are no standard guarantees about memory alignment, common architectures like x86 are very consistent. +Lisp implementations historically have been one of the first uses of pointer tagging[^taglisp]. +The type tags were even supported at the hardware level in Lisp machines[^lispmach]. +Even modern implementations, such as SBCL, rely on tagged pointers[^sbcl]. +While I'm treating low and high bits tagging as separate techniques, they could be combined. +The Go's runtime does just that with its *taggedPointer*[^go]. +In this way it can store up to 19 bits of metadata in each pointer. +## Alternative techniques + +We will see in detail three techniques: *tagged union*, *low bits tagging*, and *high bits tagging*. +But there are many ways to squeeze information into a pointer, some of which are worth mentioning. + +A technique called **NaN boxing** makes use of specific bit configurations of the IEEE 754 double +to store either a number or a pointer (up to 52 bits). +This is particularly used in languages where double is the dominant number type[^nan]. + +The V8 JavaScript engine uses **pointer compression**[^v8]. +Instead of using real addresses, all the allocations are represented by 32-bit offsets in a 4 GiB heap. +By *compressing* the pointer size, the memory footprint is reduced by up to 40%. + ## Tagged union implementation -Let's start ```c typedef enum { @@ -225,30 +276,35 @@ typedef struct { ``` ```c -// Object value #define VALUE_IS_OBJECT(val) ((val).tag == TAG_OBJECT) #define VALUE_FROM_OBJECT(obj) ((value_t){ .tag = TAG_OBJECT, .to.object = (void *)(obj) }) #define VALUE_TO_OBJECT(val) ((val).to.object) +``` -// Integer value +```c #define INTEGER_MAX INTPTR_MAX #define INTEGER_MIN INTPTR_MIN +``` +```c #define VALUE_IS_INTEGER(val) ((val).tag == TAG_INTEGER) #define VALUE_FROM_INTEGER(num) ((value_t){ .tag = TAG_INTEGER, .to.integer = (num) }) #define VALUE_TO_INTEGER(val) ((val).to.integer) +``` -// Float value +```c #define VALUE_IS_FLOAT(val) ((val).tag == TAG_FLOAT) #define VALUE_FROM_FLOAT(num) ((value_t){ .tag = TAG_FLOAT, .to.float_ = (num) }) #define VALUE_TO_FLOAT(val) ((val).to.float_) +``` -// String value +```c #define VALUE_IS_STRING(val) ((val).tag == TAG_STRING) #define VALUE_FROM_STRING(str) ((value_t){ .tag = TAG_STRING, .to.string = (char *)(str) }) #define VALUE_TO_STRING(val) ((val).to.string) +``` -// Tiny string value +```c #define VALUE_IS_TINYSTR(val) ((val).tag == TAG_TINYSTR) #define VALUE_FROM_TINYSTR(str) ((value_t){ .tag = TAG_TINYSTR, .to.tinystr = (str) }) #define VALUE_TO_TINYSTR(val) ((val).to.tinystr) @@ -277,13 +333,15 @@ typedef uintptr_t value_t; #define VALUE_GET_TAG(val, mask) (value_tag_t)((value_t)(val) & mask) #define VALUE_SET_TAG(val, tag) ((value_t)(val) | tag) #define VALUE_UNSET_TAG(val) ((val) & ~VALUE_TAG_MASK) +``` -// Object value +```c #define VALUE_IS_OBJECT(val) (VALUE_GET_TAG(val, VALUE_TAG_MASK) == TAG_OBJECT) #define VALUE_FROM_OBJECT(obj) VALUE_SET_TAG(obj, TAG_OBJECT) #define VALUE_TO_OBJECT(val) (void *)VALUE_UNSET_TAG(val) +``` -// Integer value +```c #define INTEGER_SHIFT 1 #define INTEGER_MASK 1 #define INTEGER_SIGN_BIT ((value_t)1 << (VALUE_BITS - 1)) @@ -304,8 +362,9 @@ intptr_t value_untag_integer(value_t val) { assert(VALUE_IS_INTEGER(val)); return (intptr_t)val >> INTEGER_SHIFT | (val & INTEGER_SIGN_BIT); } +``` -// Float value +```c #define FLOAT_SHIFT 31 #define VALUE_IS_FLOAT(val) (VALUE_GET_TAG(val, VALUE_TAG_MASK) == TAG_FLOAT) @@ -330,13 +389,15 @@ float value_untag_float(value_t val) { pun.raw = (val >> FLOAT_SHIFT) & 0xffffffff; return pun.num; } +``` -// String value +```c #define VALUE_IS_STRING(val) (VALUE_GET_TAG(val, VALUE_TAG_MASK) == TAG_STRING) #define VALUE_FROM_STRING(str) VALUE_SET_TAG(str, TAG_STRING) #define VALUE_TO_STRING(val) (char *)VALUE_UNSET_TAG(val) +``` -// Tiny string value +```c #define VALUE_IS_TINYSTR(val) (VALUE_GET_TAG(val, VALUE_TAG_MASK) == TAG_TINYSTR) #define VALUE_FROM_TINYSTR(num) value_tag_tinystr(num) #define VALUE_TO_TINYSTR(val) value_untag_tinystr(val) @@ -373,11 +434,11 @@ tinystr_t value_untag_tinystr(value_t val) { ```c // Only the integer tag sets the highest bit typedef enum { - TAG_OBJECT = 0, // 0b000000 - TAG_INTEGER = 0x20, // 0b100000 - TAG_FLOAT = 2, // 0b000010 - TAG_STRING = 3, // 0b000011 - TAG_TINYSTR = 4, // 0b000100 + TAG_OBJECT = 0, // 0b_000000 + TAG_INTEGER = 0x40, // 0b1000000 + TAG_FLOAT = 2, // 0b_000010 + TAG_STRING = 3, // 0b_000011 + TAG_TINYSTR = 4, // 0b_000100 } value_tag_t; ``` @@ -390,16 +451,18 @@ typedef enum { #define VALUE_GET_TAG(val, mask) (value_tag_t)(((value_t)(val) & mask) >> VALUE_TAG_SHIFT) #define VALUE_SET_TAG(val, tag) ((value_t)(val) | (value_t)tag << VALUE_TAG_SHIFT) #define VALUE_UNSET_TAG(val) ((val) & ~VALUE_TAG_MASK) +``` -// Object value +```c #define VALUE_IS_OBJECT(val) (VALUE_GET_TAG(val, VALUE_TAG_MASK) == TAG_OBJECT) #define VALUE_FROM_OBJECT(obj) VALUE_SET_TAG(obj, TAG_OBJECT) #define VALUE_TO_OBJECT(val) (void *)VALUE_UNSET_TAG(val) +``` -// Integer value -#define INTEGER_SHIFT (VALUE_BITS - 1 - 1) +```c +#define INTEGER_SHIFT (VALUE_BITS - 1) #define INTEGER_MASK ((value_t)1 << INTEGER_SHIFT) -#define INTEGER_HIGH_MASK ((value_t)7 << (INTEGER_SHIFT - 1)) +#define INTEGER_HIGH_MASK ((value_t)3 << (INTEGER_SHIFT - 1)) #define INTEGER_SIGN_BIT ((value_t)1 << (INTEGER_SHIFT - 1)) #define INTEGER_MAX (INTEGER_SIGN_BIT - 1) @@ -425,8 +488,9 @@ intptr_t value_untag_integer(value_t val) { if (val & INTEGER_SIGN_BIT) num |= INTEGER_HIGH_MASK; return num; } +``` -// Float value +```c #define VALUE_IS_FLOAT(val) (VALUE_GET_TAG(val, VALUE_TAG_MASK) == TAG_FLOAT) #define VALUE_FROM_FLOAT(num) value_tag_float(num) #define VALUE_TO_FLOAT(val) value_untag_float(val) @@ -449,13 +513,15 @@ float value_untag_float(value_t val) { pun.raw = val & 0xffffffff; return pun.num; } +``` -// String value +```c #define VALUE_IS_STRING(val) (VALUE_GET_TAG(val, VALUE_TAG_MASK) == TAG_STRING) #define VALUE_FROM_STRING(str) VALUE_SET_TAG(str, TAG_STRING) #define VALUE_TO_STRING(val) (char *)VALUE_UNSET_TAG(val) +``` -// Tiny string value +```c #define VALUE_IS_TINYSTR(val) (VALUE_GET_TAG(val, VALUE_TAG_MASK) == TAG_TINYSTR) #define VALUE_FROM_TINYSTR(num) value_tag_tinystr(num) #define VALUE_TO_TINYSTR(val) value_untag_tinystr(val) @@ -488,6 +554,14 @@ tinystr_t value_untag_tinystr(value_t val) { ``` +## Conclusion + +I hope you liked this article. + +Heed my warning about use. + + + ## References - https://muxup.com/2023q4/storing-data-in-pointers @@ -495,8 +569,13 @@ tinystr_t value_untag_tinystr(value_t val) { - https://bernsteinbear.com/blog/small-objects - https://en.wikichip.org/wiki/arm/tbi - https://mikeash.com/pyblog/friday-qa-2012-07-27-lets-build-tagged-pointers.html -- https://www.snellman.net/blog/archive/2017-09-04-lisp-numbers - https://alwaysprocessing.blog/2023/03/19/objc-tagged-ptr +- https://piotrduperas.com/posts/nan-boxing +- https://www.linaro.org/blog/type-tracking-using-arm-memory-tagging +- https://simonsafar.com/2020/sbcl + +[^taglisp]: https://www.snellman.net/blog/archive/2017-09-04-lisp-numbers +[^lispmach]: https://en.wikipedia.org/wiki/Lisp_machine [^nan]: https://wingolog.org/archives/2011/05/18/value-representation-in-javascript-implementations @@ -551,8 +630,17 @@ tinystr_t value_untag_tinystr(value_t val) { [^sbcl]: https://web.archive.org/web/20121014233601/http://sbcl-internals.cliki.net/tag%20bit [^caml]: https://web.archive.org/web/20090810001400/https://caml.inria.fr/pub/ml-archives/caml-list/2004/07/e86a25aa6c6a6a7d08dd7eb50cfd5d52.fr.html +[^ocaml]: https://dev.realworldocaml.org/runtime-memory-layout.html [^tagarch]: https://en.wikipedia.org/wiki/Tagged_architecture [^typed]: Even strongly typed languages like OCaml make use of tagged pointers, \ - https://blog.janestreet.com/what-is-gained-and-lost-with-63-bit-integers/ + https://blog.janestreet.com/what-is-gained-and-lost-with-63-bit-integers + +[^go]: https://github.com/golang/go/blob/master/src/runtime/tagptr_64bit.go + +[^addressing]: They might still be used in certain circumstances. For example, Linux *usually* marks kernel addresses with a 1 in the MSB. + +[^llvm]: https://github.com/llvm/llvm-project/blob/3bd3e06f3fe418e24af65457877f40cee0544f9d/llvm/include/llvm/ADT/PointerIntPair.h#L64 + +[^linux5page]: https://lwn.net/Articles/717293 diff --git a/content/posts/pointer-tagging/test.c b/content/posts/pointer-tagging/test.c index 5b3580c..593461a 100644 --- a/content/posts/pointer-tagging/test.c +++ b/content/posts/pointer-tagging/test.c @@ -44,6 +44,7 @@ int main() { print_binary(v); puts(""); + assert(VALUE_IS_INTEGER(v)); assert(ls[i] == VALUE_TO_INTEGER(v)); } } @@ -60,6 +61,7 @@ int main() { print_binary(v); puts(""); + assert(VALUE_IS_FLOAT(v)); assert(ls[i] == VALUE_TO_FLOAT(v)); } } diff --git a/themes/chilldark b/themes/chilldark index 2c436fb..417b5b3 160000 --- a/themes/chilldark +++ b/themes/chilldark @@ -1 +1 @@ -Subproject commit 2c436fb09fc62a235bc2cbf8f7d18c0fd50cb094 +Subproject commit 417b5b3ab4ba63e78c43dca6d21da47a8b8409ef -- cgit v1.2.3