146 lines
4.5 KiB

  1. ///////////////////////////////////////////////////////////////////////////////////
  2. /// OpenGL Mathematics (glm.g-truc.net)
  3. ///
  4. /// Copyright (c) 2005 - 2013 G-Truc Creation (www.g-truc.net)
  5. /// Permission is hereby granted, free of charge, to any person obtaining a copy
  6. /// of this software and associated documentation files (the "Software"), to deal
  7. /// in the Software without restriction, including without limitation the rights
  8. /// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. /// copies of the Software, and to permit persons to whom the Software is
  10. /// furnished to do so, subject to the following conditions:
  11. ///
  12. /// The above copyright notice and this permission notice shall be included in
  13. /// all copies or substantial portions of the Software.
  14. ///
  15. /// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. /// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17. /// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. /// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. /// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20. /// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  21. /// THE SOFTWARE.
  22. ///
  23. /// @ref core
  24. /// @file glm/core/intrinsic_geometric.inl
  25. /// @date 2009-05-08 / 2011-06-15
  26. /// @author Christophe Riccio
  27. ///////////////////////////////////////////////////////////////////////////////////
  28. namespace glm{
  29. namespace detail{
  30. //length
  31. GLM_FUNC_QUALIFIER __m128 sse_len_ps(__m128 x)
  32. {
  33. __m128 dot0 = sse_dot_ps(x, x);
  34. __m128 sqt0 = _mm_sqrt_ps(dot0);
  35. return sqt0;
  36. }
  37. //distance
  38. GLM_FUNC_QUALIFIER __m128 sse_dst_ps(__m128 p0, __m128 p1)
  39. {
  40. __m128 sub0 = _mm_sub_ps(p0, p1);
  41. __m128 len0 = sse_len_ps(sub0);
  42. return len0;
  43. }
  44. //dot
  45. GLM_FUNC_QUALIFIER __m128 sse_dot_ps(__m128 v1, __m128 v2)
  46. {
  47. # if((GLM_ARCH & GLM_ARCH_SSE4) == GLM_ARCH_SSE4)
  48. return _mm_dp_ps(v1, v2, 0xff);
  49. # else
  50. __m128 mul0 = _mm_mul_ps(v1, v2);
  51. __m128 swp0 = _mm_shuffle_ps(mul0, mul0, _MM_SHUFFLE(2, 3, 0, 1));
  52. __m128 add0 = _mm_add_ps(mul0, swp0);
  53. __m128 swp1 = _mm_shuffle_ps(add0, add0, _MM_SHUFFLE(0, 1, 2, 3));
  54. __m128 add1 = _mm_add_ps(add0, swp1);
  55. return add1;
  56. # endif
  57. }
  58. // SSE1
  59. GLM_FUNC_QUALIFIER __m128 sse_dot_ss(__m128 v1, __m128 v2)
  60. {
  61. __m128 mul0 = _mm_mul_ps(v1, v2);
  62. __m128 mov0 = _mm_movehl_ps(mul0, mul0);
  63. __m128 add0 = _mm_add_ps(mov0, mul0);
  64. __m128 swp1 = _mm_shuffle_ps(add0, add0, 1);
  65. __m128 add1 = _mm_add_ss(add0, swp1);
  66. return add1;
  67. }
  68. //cross
  69. GLM_FUNC_QUALIFIER __m128 sse_xpd_ps(__m128 v1, __m128 v2)
  70. {
  71. __m128 swp0 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 0, 2, 1));
  72. __m128 swp1 = _mm_shuffle_ps(v1, v1, _MM_SHUFFLE(3, 1, 0, 2));
  73. __m128 swp2 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 0, 2, 1));
  74. __m128 swp3 = _mm_shuffle_ps(v2, v2, _MM_SHUFFLE(3, 1, 0, 2));
  75. __m128 mul0 = _mm_mul_ps(swp0, swp3);
  76. __m128 mul1 = _mm_mul_ps(swp1, swp2);
  77. __m128 sub0 = _mm_sub_ps(mul0, mul1);
  78. return sub0;
  79. }
  80. //normalize
  81. GLM_FUNC_QUALIFIER __m128 sse_nrm_ps(__m128 v)
  82. {
  83. __m128 dot0 = sse_dot_ps(v, v);
  84. __m128 isr0 = _mm_rsqrt_ps(dot0);
  85. __m128 mul0 = _mm_mul_ps(v, isr0);
  86. return mul0;
  87. }
  88. //faceforward
  89. GLM_FUNC_QUALIFIER __m128 sse_ffd_ps(__m128 N, __m128 I, __m128 Nref)
  90. {
  91. //__m128 dot0 = _mm_dot_ps(v, v);
  92. //__m128 neg0 = _mm_neg_ps(N);
  93. //__m128 sgn0 = _mm_sgn_ps(dot0);
  94. //__m128 mix0 = _mm_mix_ps(N, neg0, sgn0);
  95. //return mix0;
  96. __m128 dot0 = sse_dot_ps(Nref, I);
  97. __m128 sgn0 = sse_sgn_ps(dot0);
  98. __m128 mul0 = _mm_mul_ps(sgn0, glm::detail::minus_one);
  99. __m128 mul1 = _mm_mul_ps(N, mul0);
  100. return mul1;
  101. }
  102. //reflect
  103. GLM_FUNC_QUALIFIER __m128 sse_rfe_ps(__m128 I, __m128 N)
  104. {
  105. __m128 dot0 = sse_dot_ps(N, I);
  106. __m128 mul0 = _mm_mul_ps(N, dot0);
  107. __m128 mul1 = _mm_mul_ps(mul0, glm::detail::two);
  108. __m128 sub0 = _mm_sub_ps(I, mul1);
  109. return sub0;
  110. }
  111. //refract
  112. GLM_FUNC_QUALIFIER __m128 sse_rfa_ps(__m128 I, __m128 N, __m128 eta)
  113. {
  114. __m128 dot0 = sse_dot_ps(N, I);
  115. __m128 mul0 = _mm_mul_ps(eta, eta);
  116. __m128 mul1 = _mm_mul_ps(dot0, dot0);
  117. __m128 sub0 = _mm_sub_ps(glm::detail::one, mul0);
  118. __m128 sub1 = _mm_sub_ps(glm::detail::one, mul1);
  119. __m128 mul2 = _mm_mul_ps(sub0, sub1);
  120. if(_mm_movemask_ps(_mm_cmplt_ss(mul2, glm::detail::zero)) == 0)
  121. return glm::detail::zero;
  122. __m128 sqt0 = _mm_sqrt_ps(mul2);
  123. __m128 mul3 = _mm_mul_ps(eta, dot0);
  124. __m128 add0 = _mm_add_ps(mul3, sqt0);
  125. __m128 mul4 = _mm_mul_ps(add0, N);
  126. __m128 mul5 = _mm_mul_ps(eta, I);
  127. __m128 sub2 = _mm_sub_ps(mul5, mul4);
  128. return sub2;
  129. }
  130. }//namespace detail
  131. }//namespace glm