STLとの共存のための妥協案

D3DXMATRIXA16でも、同様の手法で SIMD を使っているようです。
//  まず、アライン指定無しの構造体を宣言
struct none_aligned_vec4 {
    union {
        float       array[ 4 ];
        struct {
            float   x, y, z, w;
        };
        struct {
            float   r, g, b, a;
        };
    };
    none_aligned_vec4( float x, float y, float z, float w ) {
        array[ 0 ] = x;
        array[ 1 ] = y;
        array[ 2 ] = z;
        array[ 3 ] = w;
    }
    none_aligned_vec4( __m128 a ) {
        reinterpret_cast< __m128& >( *this ) = a;
    }
    none_aligned_vec4 operator = ( __m128 a ) {
        reinterpret_cast< __m128& >( *this ) = a;
        return *this;
    }
    
    operator __m128() {
        return reinterpret_cast< __m128& >( *this );
    }
    
    //  とりあえず += を実装してみた
    //  ただし、以下にある vec4 を使わなければ、
    //  この関数は呼べない。
    inline none_aligned_vec4 operator += ( none_aligned_vec4 v ) {
        *this = _mm_add_ps( *this, v );
        return *this;
    }
};

//  本体はアライン指定をつけて typedef
typedef __declspec(intrin_type) __declspec(align(16)) none_aligned_vec4 vec4;

int _tmain(int argc, _TCHAR* argv[])
{
    vec4 a( 0.1f, 0.2f, 0.3f, 0.4f ), b( 0.5f, 0.6f, 0.7f, 0.8f );
    a += b;
    
    //  STL 等、VC においてアライン指定が問題となるコンポーネントとの
    //  運用は、none_aligned_vec4 を使う。
    std::vector< none_aligned_vec4 >    v;
    v.push_back( a ); // もともと同じ型なので、何の問題もなく使える
    ...