// To compile, on GCC: g++ thread_local.cpp -std=c++0x -lpthread

#include <pthread.h>
#include <iostream>

template <class Dest, class Source>
inline Dest union_cast(Source source) {

    union {
        Source source;
        Dest dest;
    } converter;

    converter.source = source;
    return converter.dest;
}

template <typename T>
class ThreadLocal {
public:
    static_assert(sizeof(T) <= sizeof(void*),
            "ThreadLocal: sizeof(T) > sizeof(void*)");

    ThreadLocal() {
        pthread_key_create(&key, NULL);
    }

    ThreadLocal(const T value) {
        pthread_key_create(&key, NULL);
        set(value);
    }

    ~ThreadLocal() {
        pthread_key_delete(key);
    }

    void set(const T value) {
        pthread_setspecific(key, union_cast<void*>(value));
    }

    T get() const {
        return union_cast<T>(pthread_getspecific(key));
    }

private:
    // Disallow copy and assignment operators.
    ThreadLocal(const ThreadLocal<T>&);
    void operator=(const ThreadLocal<T>&);

    pthread_key_t key;
};

// For values that are bigger than 32 bits, we could simply
// instanciate ThreadLocal instance with a pointer as its
// type (ThreadLocal<int*>). The ThreadLocalPointer class
// makes this explicit.
template <typename T>
class ThreadLocalPointer : public ThreadLocal<T*> {
public:
    ThreadLocalPointer() : ThreadLocal<T*>() {}

private:
    // Disallow copy and assignment operators.
    ThreadLocalPointer(const ThreadLocalPointer<T>&);
    void operator=(const ThreadLocalPointer<T>&);
};

int main() {

    using namespace std;

    // In order to test the static_assert, on a 32 bits machine.
    // ThreadLocalValue<long long> long_long;

    ThreadLocal<bool> boolean;
    ThreadLocal<int> integer;

    boolean.set(true);
    cout << boolean.get() << endl;

    integer.set(1234);
    cout << integer.get() << endl;

    unsigned int a = 4321;
    ThreadLocalPointer<unsigned int> integer_pointer;
    integer_pointer.set(&a);
    cout << *integer_pointer.get() << endl;

    return 0;
}