summaryrefslogtreecommitdiff
path: root/content
diff options
context:
space:
mode:
Diffstat (limited to 'content')
-rw-r--r--content/posts/pointer-tagging/highbits.c125
-rw-r--r--content/posts/pointer-tagging/index.md20
-rw-r--r--content/posts/pointer-tagging/lowbits.c118
-rw-r--r--content/posts/pointer-tagging/test.c101
-rw-r--r--content/posts/pointer-tagging/union.c56
5 files changed, 409 insertions, 11 deletions
diff --git a/content/posts/pointer-tagging/highbits.c b/content/posts/pointer-tagging/highbits.c
new file mode 100644
index 0000000..791ff51
--- /dev/null
+++ b/content/posts/pointer-tagging/highbits.c
@@ -0,0 +1,125 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+
+// The last byte must be zero
+typedef struct {
+ char data[8];
+} tinystr_t;
+
+// Only the integer tag sets the highest bit
+typedef enum {
+ TAG_OBJECT = 0, // 0b000000
+ TAG_INTEGER = 0x20, // 0b100000
+ TAG_FLOAT = 2, // 0b000010
+ TAG_STRING = 3, // 0b000011
+ TAG_TINYSTR = 4, // 0b000100
+} value_tag_t;
+
+typedef uintptr_t value_t;
+
+#define VALUE_BITS 64
+#define VALUE_TAG_BITS 6
+#define VALUE_TAG_SHIFT (VALUE_BITS - 1 - VALUE_TAG_BITS)
+#define VALUE_TAG_MASK ((value_t)0x3f << VALUE_TAG_SHIFT)
+
+#define VALUE_GET_TAG(val, mask) (value_tag_t)(((value_t)(val) & mask) >> VALUE_TAG_SHIFT)
+#define VALUE_SET_TAG(val, tag) ((value_t)(val) | (value_t)tag << VALUE_TAG_SHIFT)
+#define VALUE_UNSET_TAG(val) ((val) & ~VALUE_TAG_MASK)
+
+// Object value
+#define VALUE_IS_OBJECT(val) (VALUE_GET_TAG(val, VALUE_TAG_MASK) == TAG_OBJECT)
+#define VALUE_FROM_OBJECT(obj) VALUE_SET_TAG(obj, TAG_OBJECT)
+#define VALUE_TO_OBJECT(val) (void *)VALUE_UNSET_TAG(val)
+
+// Integer value
+#define INTEGER_SHIFT (VALUE_BITS - 1 - 1)
+#define INTEGER_MASK ((value_t)1 << INTEGER_SHIFT)
+#define INTEGER_HIGH_MASK ((value_t)7 << (INTEGER_SHIFT - 1))
+#define INTEGER_SIGN_BIT ((value_t)1 << (INTEGER_SHIFT - 1))
+
+#define INTEGER_MAX (INTEGER_SIGN_BIT - 1)
+#define INTEGER_MIN (-INTEGER_SIGN_BIT)
+
+#define VALUE_IS_INTEGER(val) (VALUE_GET_TAG(val, INTEGER_MASK) == TAG_INTEGER)
+#define VALUE_FROM_INTEGER(num) value_tag_integer(num)
+#define VALUE_TO_INTEGER(val) value_untag_integer(val)
+
+value_t value_tag_integer(intptr_t num) {
+ assert(num < INTEGER_MIN || num > INTEGER_MAX);
+ // Clear the top bits
+ value_t val = num & ~INTEGER_HIGH_MASK;
+ // Move the sign bit
+ if (num < 0) val |= INTEGER_SIGN_BIT;
+ return VALUE_SET_TAG(val, TAG_INTEGER);
+}
+
+intptr_t value_untag_integer(value_t val) {
+ assert(VALUE_IS_INTEGER(val));
+ intptr_t num = val & ~INTEGER_HIGH_MASK;
+ // If the number is negative, pad with 1's to adjust the two's complement
+ if (val & INTEGER_SIGN_BIT) num |= INTEGER_HIGH_MASK;
+ return num;
+}
+
+// Float value
+#define VALUE_IS_FLOAT(val) (VALUE_GET_TAG(val, VALUE_TAG_MASK) == TAG_FLOAT)
+#define VALUE_FROM_FLOAT(num) value_tag_float(num)
+#define VALUE_TO_FLOAT(val) value_untag_float(val)
+
+value_t value_tag_float(float num) {
+ union {
+ uint32_t raw;
+ float num;
+ } pun;
+ pun.num = num;
+ return VALUE_SET_TAG(pun.raw, TAG_FLOAT);
+}
+
+float value_untag_float(value_t val) {
+ assert(VALUE_IS_FLOAT(val));
+ union {
+ uint32_t raw;
+ float num;
+ } pun;
+ pun.raw = val & 0xffffffff;
+ return pun.num;
+}
+
+// String value
+#define VALUE_IS_STRING(val) (VALUE_GET_TAG(val, VALUE_TAG_MASK) == TAG_STRING)
+#define VALUE_FROM_STRING(str) VALUE_SET_TAG(str, TAG_STRING)
+#define VALUE_TO_STRING(val) (char *)VALUE_UNSET_TAG(val)
+
+// Tiny string value
+
+#define VALUE_IS_TINYSTR(val) (VALUE_GET_TAG(val, VALUE_TAG_MASK) == TAG_TINYSTR)
+#define VALUE_FROM_TINYSTR(num) value_tag_tinystr(num)
+#define VALUE_TO_TINYSTR(val) value_untag_tinystr(val)
+
+value_t value_tag_tinystr(tinystr_t str) {
+ assert(str.data[7] == '\0');
+ return ((value_t)str.data[0] << 0)
+ | ((value_t)str.data[1] << 8)
+ | ((value_t)str.data[2] << 16)
+ | ((value_t)str.data[3] << 24)
+ | ((value_t)str.data[4] << 32)
+ | ((value_t)str.data[5] << 40)
+ | ((value_t)str.data[6] << 48)
+ | (value_t)TAG_TINYSTR << VALUE_TAG_SHIFT;
+}
+
+tinystr_t value_untag_tinystr(value_t val) {
+ assert(VALUE_IS_TINYSTR(val));
+ tinystr_t str = {
+ (val >> 0) & 0xff,
+ (val >> 8) & 0xff,
+ (val >> 16) & 0xff,
+ (val >> 24) & 0xff,
+ (val >> 32) & 0xff,
+ (val >> 40) & 0xff,
+ (val >> 48) & 0xff,
+ };
+ return str;
+}
diff --git a/content/posts/pointer-tagging/index.md b/content/posts/pointer-tagging/index.md
index 7f133e8..456c959 100644
--- a/content/posts/pointer-tagging/index.md
+++ b/content/posts/pointer-tagging/index.md
@@ -123,27 +123,25 @@ Top Bottom
## Why is this useful?
-Now that we have understood the principles behind pointer tagging,
-we need to address :wink: their advantages.
+Now that we have a good grasp of the principles of pointer tagging,
+we can move on to address :wink: their practical uses and advantages.
-The stored metadata could be used for many things, including memory safety[^mte2].
-However, I will focus on applications for language development and interpreters.
+The metadata stored in the tags could be utilized in many ways.
+For example, Android uses it to improve memory safety through MTE[^mte2].
+From now on, I will focus on its applications in language development and interpreters.
-Before delving into the benefits of tagged pointers, let's see what they are replacing.
+But before delving into the benefits of this optimization, let's first see what they are replacing.
A common approach is to heap allocate everything as an object and use
pointers as a uniform value representation. This is what CPython does with `PyObject`.
The usual alternative is a *tagged union*, which stores a union of the possible value types alongside
-an integer flag. This means less heap allocations with the cost of a bigger value type (usually 16 bytes).
+an integer flag. This results in fewer heap allocations, but comes at the cost of a larger value type (usually 16 bytes).
Lua is currently using this approach[^lua].
-Is there a way to have our cake and eat it too?
-By tagging our pointers, we can decrease heap allocations and have a
-single word as our value representation.
+By tagging our pointers, we can reduce heap allocations and have a single word as our value representation.
+We can have our cake and eat it too!
-while retaining pointers as our value representation by tagging them.
-
diff --git a/content/posts/pointer-tagging/lowbits.c b/content/posts/pointer-tagging/lowbits.c
new file mode 100644
index 0000000..7b3c8c8
--- /dev/null
+++ b/content/posts/pointer-tagging/lowbits.c
@@ -0,0 +1,118 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+
+// The last byte must be zero
+typedef struct {
+ char data[8];
+} tinystr_t;
+
+// Only the integer tag sets the lowest bit
+typedef enum {
+ TAG_OBJECT = 0, // 0b000
+ TAG_INTEGER = 1, // 0b001
+ TAG_FLOAT = 2, // 0b010
+ TAG_STRING = 4, // 0b100
+ TAG_TINYSTR = 6, // 0b110
+} value_tag_t;
+
+typedef uintptr_t value_t;
+
+#define VALUE_BITS 64
+#define VALUE_TAG_BITS 3
+#define VALUE_TAG_MASK 7 // 0b111
+
+#define VALUE_GET_TAG(val, mask) (value_tag_t)((value_t)(val) & mask)
+#define VALUE_SET_TAG(val, tag) ((value_t)(val) | tag)
+#define VALUE_UNSET_TAG(val) ((val) & ~VALUE_TAG_MASK)
+
+// Object value
+#define VALUE_IS_OBJECT(val) (VALUE_GET_TAG(val, VALUE_TAG_MASK) == TAG_OBJECT)
+#define VALUE_FROM_OBJECT(obj) VALUE_SET_TAG(obj, TAG_OBJECT)
+#define VALUE_TO_OBJECT(val) (void *)VALUE_UNSET_TAG(val)
+
+// Integer value
+#define INTEGER_SHIFT 1
+#define INTEGER_MASK 1
+#define INTEGER_SIGN_BIT ((value_t)1 << (VALUE_BITS - 1))
+
+#define INTEGER_MAX (((value_t)1 << (VALUE_BITS - 1 - INTEGER_SHIFT)) - 1)
+#define INTEGER_MIN -((value_t)1 << (VALUE_BITS - 1 - INTEGER_SHIFT))
+
+#define VALUE_IS_INTEGER(val) (VALUE_GET_TAG(val, INTEGER_MASK) == TAG_INTEGER)
+#define VALUE_FROM_INTEGER(num) value_tag_integer(num)
+#define VALUE_TO_INTEGER(val) value_untag_integer(val)
+
+value_t value_tag_integer(intptr_t num) {
+ assert(num < INTEGER_MIN || num > INTEGER_MAX);
+ return (value_t)num << INTEGER_SHIFT | (num & INTEGER_SIGN_BIT) | TAG_INTEGER;
+}
+
+intptr_t value_untag_integer(value_t val) {
+ assert(VALUE_IS_INTEGER(val));
+ return (intptr_t)val >> INTEGER_SHIFT | (val & INTEGER_SIGN_BIT);
+}
+
+// Float value
+#define FLOAT_SHIFT 31
+
+#define VALUE_IS_FLOAT(val) (VALUE_GET_TAG(val, VALUE_TAG_MASK) == TAG_FLOAT)
+#define VALUE_FROM_FLOAT(num) value_tag_float(num)
+#define VALUE_TO_FLOAT(val) value_untag_float(val)
+
+value_t value_tag_float(float num) {
+ union {
+ uint32_t raw;
+ float num;
+ } pun;
+ pun.num = num;
+ return VALUE_SET_TAG((value_t)pun.raw << FLOAT_SHIFT, TAG_FLOAT);
+}
+
+float value_untag_float(value_t val) {
+ assert(VALUE_IS_FLOAT(val));
+ union {
+ uint32_t raw;
+ float num;
+ } pun;
+ pun.raw = (val >> FLOAT_SHIFT) & 0xffffffff;
+ return pun.num;
+}
+
+// String value
+#define VALUE_IS_STRING(val) (VALUE_GET_TAG(val, VALUE_TAG_MASK) == TAG_STRING)
+#define VALUE_FROM_STRING(str) VALUE_SET_TAG(str, TAG_STRING)
+#define VALUE_TO_STRING(val) (char *)VALUE_UNSET_TAG(val)
+
+// Tiny string value
+
+#define VALUE_IS_TINYSTR(val) (VALUE_GET_TAG(val, VALUE_TAG_MASK) == TAG_TINYSTR)
+#define VALUE_FROM_TINYSTR(num) value_tag_tinystr(num)
+#define VALUE_TO_TINYSTR(val) value_untag_tinystr(val)
+
+value_t value_tag_tinystr(tinystr_t str) {
+ assert(str.data[7] == '\0');
+ return ((value_t)str.data[0] << 8)
+ | ((value_t)str.data[1] << 16)
+ | ((value_t)str.data[2] << 24)
+ | ((value_t)str.data[3] << 32)
+ | ((value_t)str.data[4] << 40)
+ | ((value_t)str.data[5] << 48)
+ | ((value_t)str.data[6] << 56)
+ | TAG_TINYSTR;
+}
+
+tinystr_t value_untag_tinystr(value_t val) {
+ assert(VALUE_IS_TINYSTR(val));
+ tinystr_t str = {
+ (val >> 8) & 0xff,
+ (val >> 16) & 0xff,
+ (val >> 24) & 0xff,
+ (val >> 32) & 0xff,
+ (val >> 40) & 0xff,
+ (val >> 48) & 0xff,
+ (val >> 56) & 0xff,
+ };
+ return str;
+}
diff --git a/content/posts/pointer-tagging/test.c b/content/posts/pointer-tagging/test.c
new file mode 100644
index 0000000..88d259b
--- /dev/null
+++ b/content/posts/pointer-tagging/test.c
@@ -0,0 +1,101 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "lowbits.c"
+//#include "highbits.c"
+//#include "union.c"
+
+void print_binary(value_t v) {
+ const size_t n = sizeof(value_t) / sizeof(uintptr_t);
+ uintptr_t ls[n];
+ memcpy(ls, &v, sizeof(value_t));
+
+ for (int i = 0; i < n; i++) {
+ for (int j = 8 * sizeof(uintptr_t) - 1; j >= 0; j--) {
+ putchar((ls[i] >> j) & 1 ? '1' : '0');
+ }
+ puts("");
+ }
+}
+
+int main() {
+
+ // Object
+ {
+ void *obj = malloc(123);
+ value_t v = VALUE_FROM_OBJECT(obj);
+ printf("Object %p\nTagged: ", obj);
+ print_binary(v);
+ puts("");
+
+ assert(VALUE_IS_OBJECT(v));
+ assert(VALUE_TO_OBJECT(v) == obj);
+ }
+
+ // Integer
+ {
+ intptr_t ls[] = {
+ -1, 10, 1000000, INTEGER_MAX, INTEGER_MIN, 424242
+ };
+
+ for (int i = 0; i < sizeof(ls)/sizeof(*ls); i++) {
+ value_t v = VALUE_FROM_INTEGER(ls[i]);
+ printf("Integer %ld\nTagged: ", ls[i]);
+ print_binary(v);
+ puts("");
+
+ assert(ls[i] == VALUE_TO_INTEGER(v));
+ }
+ }
+
+ // Float
+ {
+ float ls[] = {
+ -1.0, 88.88, 10e-16, 199.9876e12, -1111111111
+ };
+
+ for (int i = 0; i < sizeof(ls)/sizeof(*ls); i++) {
+ value_t v = VALUE_FROM_FLOAT(ls[i]);
+ printf("Float %f\nTagged: ", ls[i]);
+ print_binary(v);
+ puts("");
+
+ assert(ls[i] == VALUE_TO_FLOAT(v));
+ }
+ }
+
+ // String
+ {
+ char *str = malloc(20);
+ strcpy(str, "Hello, World");
+
+ value_t v = VALUE_FROM_STRING(str);
+ printf("String %s\nTagged: ", str);
+ print_binary(v);
+ puts("");
+
+ assert(VALUE_IS_STRING(v));
+ assert(VALUE_TO_STRING(v) == str);
+ }
+
+ // Tiny string
+ {
+ tinystr_t s1 = { "Short" };
+ value_t v1 = VALUE_FROM_TINYSTR(s1);
+ printf("Tiny string %s\nTagged: ", s1.data);
+ print_binary(v1);
+ puts("");
+
+ assert(!memcmp(s1.data, VALUE_TO_TINYSTR(v1).data, 8));
+
+ tinystr_t s2 = { "1234567" };
+ value_t v2 = VALUE_FROM_TINYSTR(s2);
+ printf("Tiny string %s\nTagged: ", s2.data);
+ print_binary(v2);
+ puts("");
+
+ assert(!memcmp(s2.data, VALUE_TO_TINYSTR(v2).data, 8));
+ }
+
+ return 0;
+}
diff --git a/content/posts/pointer-tagging/union.c b/content/posts/pointer-tagging/union.c
new file mode 100644
index 0000000..6cba12f
--- /dev/null
+++ b/content/posts/pointer-tagging/union.c
@@ -0,0 +1,56 @@
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+
+// The last byte must be zero
+typedef struct {
+ char data[8];
+} tinystr_t;
+
+typedef enum {
+ TAG_OBJECT,
+ TAG_INTEGER,
+ TAG_FLOAT,
+ TAG_STRING,
+ TAG_TINYSTR,
+} value_tag_t;
+
+typedef struct {
+ value_tag_t tag;
+ union {
+ void *object;
+ intptr_t integer;
+ float float_;
+ char *string;
+ tinystr_t tinystr;
+ } to;
+} value_t;
+
+// Object value
+#define VALUE_IS_OBJECT(val) ((val).tag == TAG_OBJECT)
+#define VALUE_FROM_OBJECT(obj) ((value_t){ .tag = TAG_OBJECT, .to.object = (void *)(obj) })
+#define VALUE_TO_OBJECT(val) ((val).to.object)
+
+// Integer value
+#define INTEGER_MAX INTPTR_MAX
+#define INTEGER_MIN INTPTR_MIN
+
+#define VALUE_IS_INTEGER(val) ((val).tag == TAG_INTEGER)
+#define VALUE_FROM_INTEGER(num) ((value_t){ .tag = TAG_INTEGER, .to.integer = (num) })
+#define VALUE_TO_INTEGER(val) ((val).to.integer)
+
+// Float value
+#define VALUE_IS_FLOAT(val) ((val).tag == TAG_FLOAT)
+#define VALUE_FROM_FLOAT(num) ((value_t){ .tag = TAG_FLOAT, .to.float_ = (num) })
+#define VALUE_TO_FLOAT(val) ((val).to.float_)
+
+// String value
+#define VALUE_IS_STRING(val) ((val).tag == TAG_STRING)
+#define VALUE_FROM_STRING(str) ((value_t){ .tag = TAG_STRING, .to.string = (char *)(str) })
+#define VALUE_TO_STRING(val) ((val).to.string)
+
+// Tiny string value
+#define VALUE_IS_TINYSTR(val) ((val).tag == TAG_TINYSTR)
+#define VALUE_FROM_TINYSTR(str) ((value_t){ .tag = TAG_TINYSTR, .to.tinystr = (str) })
+#define VALUE_TO_TINYSTR(val) ((val).to.tinystr)