You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

727 lines
15 KiB

5 years ago
  1. ///////////////////////////////////////////////////////////////////////////////////////////////////
  2. // OpenGL Mathematics Copyright (c) 2005 - 2013 G-Truc Creation (www.g-truc.net)
  3. ///////////////////////////////////////////////////////////////////////////////////////////////////
  4. // Created : 2009-05-07
  5. // Updated : 2009-05-07
  6. // Licence : This source is under MIT License
  7. // File : glm/gtx/simd_vec4.inl
  8. ///////////////////////////////////////////////////////////////////////////////////////////////////
  9. namespace glm{
  10. namespace detail{
  11. template <int Value>
  12. struct mask
  13. {
  14. enum{value = Value};
  15. };
  16. //////////////////////////////////////
  17. // Implicit basic constructors
  18. GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD()
  19. #ifdef GLM_SIMD_ENABLE_DEFAULT_INIT
  20. : Data(_mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f))
  21. #endif
  22. {}
  23. GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(__m128 const & Data) :
  24. Data(Data)
  25. {}
  26. GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(fvec4SIMD const & v) :
  27. Data(v.Data)
  28. {}
  29. GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(tvec4<float> const & v) :
  30. Data(_mm_set_ps(v.w, v.z, v.y, v.x))
  31. {}
  32. //////////////////////////////////////
  33. // Explicit basic constructors
  34. GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s) :
  35. Data(_mm_set1_ps(s))
  36. {}
  37. GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & x, float const & y, float const & z, float const & w) :
  38. // Data(_mm_setr_ps(x, y, z, w))
  39. Data(_mm_set_ps(w, z, y, x))
  40. {}
  41. /*
  42. GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const v[4]) :
  43. Data(_mm_load_ps(v))
  44. {}
  45. */
  46. //////////////////////////////////////
  47. // Swizzle constructors
  48. //fvec4SIMD(ref4<float> const & r);
  49. //////////////////////////////////////
  50. // Convertion vector constructors
  51. GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec2 const & v, float const & s1, float const & s2) :
  52. Data(_mm_set_ps(s2, s1, v.y, v.x))
  53. {}
  54. GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s1, vec2 const & v, float const & s2) :
  55. Data(_mm_set_ps(s2, v.y, v.x, s1))
  56. {}
  57. GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s1, float const & s2, vec2 const & v) :
  58. Data(_mm_set_ps(v.y, v.x, s2, s1))
  59. {}
  60. GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec3 const & v, float const & s) :
  61. Data(_mm_set_ps(s, v.z, v.y, v.x))
  62. {}
  63. GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s, vec3 const & v) :
  64. Data(_mm_set_ps(v.z, v.y, v.x, s))
  65. {}
  66. GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec2 const & v1, vec2 const & v2) :
  67. Data(_mm_set_ps(v2.y, v2.x, v1.y, v1.x))
  68. {}
  69. //GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(ivec4SIMD const & v) :
  70. // Data(_mm_cvtepi32_ps(v.Data))
  71. //{}
  72. //////////////////////////////////////
  73. // Unary arithmetic operators
  74. GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator=(fvec4SIMD const & v)
  75. {
  76. this->Data = v.Data;
  77. return *this;
  78. }
  79. GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator+=(float const & s)
  80. {
  81. this->Data = _mm_add_ps(Data, _mm_set_ps1(s));
  82. return *this;
  83. }
  84. GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator+=(fvec4SIMD const & v)
  85. {
  86. this->Data = _mm_add_ps(this->Data , v.Data);
  87. return *this;
  88. }
  89. GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator-=(float const & s)
  90. {
  91. this->Data = _mm_sub_ps(Data, _mm_set_ps1(s));
  92. return *this;
  93. }
  94. GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator-=(fvec4SIMD const & v)
  95. {
  96. this->Data = _mm_sub_ps(this->Data , v.Data);
  97. return *this;
  98. }
  99. GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator*=(float const & s)
  100. {
  101. this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s));
  102. return *this;
  103. }
  104. GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator*=(fvec4SIMD const & v)
  105. {
  106. this->Data = _mm_mul_ps(this->Data , v.Data);
  107. return *this;
  108. }
  109. GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator/=(float const & s)
  110. {
  111. this->Data = _mm_div_ps(Data, _mm_set1_ps(s));
  112. return *this;
  113. }
  114. GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator/=(fvec4SIMD const & v)
  115. {
  116. this->Data = _mm_div_ps(this->Data , v.Data);
  117. return *this;
  118. }
  119. GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator++()
  120. {
  121. this->Data = _mm_add_ps(this->Data , glm::detail::one);
  122. return *this;
  123. }
  124. GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator--()
  125. {
  126. this->Data = _mm_sub_ps(this->Data, glm::detail::one);
  127. return *this;
  128. }
  129. //////////////////////////////////////
  130. // Swizzle operators
  131. template <comp X, comp Y, comp Z, comp W>
  132. GLM_FUNC_QUALIFIER fvec4SIMD fvec4SIMD::swizzle() const
  133. {
  134. __m128 Data = _mm_shuffle_ps(
  135. this->Data, this->Data,
  136. mask<(W << 6) | (Z << 4) | (Y << 2) | (X << 0)>::value);
  137. return fvec4SIMD(Data);
  138. }
  139. template <comp X, comp Y, comp Z, comp W>
  140. GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::swizzle()
  141. {
  142. this->Data = _mm_shuffle_ps(
  143. this->Data, this->Data,
  144. mask<(W << 6) | (Z << 4) | (Y << 2) | (X << 0)>::value);
  145. return *this;
  146. }
  147. // operator+
  148. GLM_FUNC_QUALIFIER fvec4SIMD operator+ (fvec4SIMD const & v, float s)
  149. {
  150. return fvec4SIMD(_mm_add_ps(v.Data, _mm_set1_ps(s)));
  151. }
  152. GLM_FUNC_QUALIFIER fvec4SIMD operator+ (float s, fvec4SIMD const & v)
  153. {
  154. return fvec4SIMD(_mm_add_ps(_mm_set1_ps(s), v.Data));
  155. }
  156. GLM_FUNC_QUALIFIER fvec4SIMD operator+ (fvec4SIMD const & v1, fvec4SIMD const & v2)
  157. {
  158. return fvec4SIMD(_mm_add_ps(v1.Data, v2.Data));
  159. }
  160. //operator-
  161. GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v, float s)
  162. {
  163. return fvec4SIMD(_mm_sub_ps(v.Data, _mm_set1_ps(s)));
  164. }
  165. GLM_FUNC_QUALIFIER fvec4SIMD operator- (float s, fvec4SIMD const & v)
  166. {
  167. return fvec4SIMD(_mm_sub_ps(_mm_set1_ps(s), v.Data));
  168. }
  169. GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v1, fvec4SIMD const & v2)
  170. {
  171. return fvec4SIMD(_mm_sub_ps(v1.Data, v2.Data));
  172. }
  173. //operator*
  174. GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, float s)
  175. {
  176. __m128 par0 = v.Data;
  177. __m128 par1 = _mm_set1_ps(s);
  178. return fvec4SIMD(_mm_mul_ps(par0, par1));
  179. }
  180. GLM_FUNC_QUALIFIER fvec4SIMD operator* (float s, fvec4SIMD const & v)
  181. {
  182. __m128 par0 = _mm_set1_ps(s);
  183. __m128 par1 = v.Data;
  184. return fvec4SIMD(_mm_mul_ps(par0, par1));
  185. }
  186. GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v1, fvec4SIMD const & v2)
  187. {
  188. return fvec4SIMD(_mm_mul_ps(v1.Data, v2.Data));
  189. }
  190. //operator/
  191. GLM_FUNC_QUALIFIER fvec4SIMD operator/ (fvec4SIMD const & v, float s)
  192. {
  193. __m128 par0 = v.Data;
  194. __m128 par1 = _mm_set1_ps(s);
  195. return fvec4SIMD(_mm_div_ps(par0, par1));
  196. }
  197. GLM_FUNC_QUALIFIER fvec4SIMD operator/ (float s, fvec4SIMD const & v)
  198. {
  199. __m128 par0 = _mm_set1_ps(s);
  200. __m128 par1 = v.Data;
  201. return fvec4SIMD(_mm_div_ps(par0, par1));
  202. }
  203. GLM_FUNC_QUALIFIER fvec4SIMD operator/ (fvec4SIMD const & v1, fvec4SIMD const & v2)
  204. {
  205. return fvec4SIMD(_mm_div_ps(v1.Data, v2.Data));
  206. }
  207. // Unary constant operators
  208. GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v)
  209. {
  210. return fvec4SIMD(_mm_sub_ps(_mm_setzero_ps(), v.Data));
  211. }
  212. GLM_FUNC_QUALIFIER fvec4SIMD operator++ (fvec4SIMD const & v, int)
  213. {
  214. return fvec4SIMD(_mm_add_ps(v.Data, glm::detail::one));
  215. }
  216. GLM_FUNC_QUALIFIER fvec4SIMD operator-- (fvec4SIMD const & v, int)
  217. {
  218. return fvec4SIMD(_mm_sub_ps(v.Data, glm::detail::one));
  219. }
  220. }//namespace detail
  221. GLM_FUNC_QUALIFIER detail::tvec4<float> vec4_cast
  222. (
  223. detail::fvec4SIMD const & x
  224. )
  225. {
  226. GLM_ALIGN(16) detail::tvec4<float> Result;
  227. _mm_store_ps(&Result[0], x.Data);
  228. return Result;
  229. }
  230. // Other possible implementation
  231. //float abs(float a)
  232. //{
  233. // return max(-a, a);
  234. //}
  235. GLM_FUNC_QUALIFIER detail::fvec4SIMD abs
  236. (
  237. detail::fvec4SIMD const & x
  238. )
  239. {
  240. return detail::sse_abs_ps(x.Data);
  241. }
  242. GLM_FUNC_QUALIFIER detail::fvec4SIMD sign
  243. (
  244. detail::fvec4SIMD const & x
  245. )
  246. {
  247. return detail::sse_sgn_ps(x.Data);
  248. }
  249. GLM_FUNC_QUALIFIER detail::fvec4SIMD floor
  250. (
  251. detail::fvec4SIMD const & x
  252. )
  253. {
  254. return detail::sse_flr_ps(x.Data);
  255. }
  256. GLM_FUNC_QUALIFIER detail::fvec4SIMD trunc
  257. (
  258. detail::fvec4SIMD const & x
  259. )
  260. {
  261. //return x < 0 ? -floor(-x) : floor(x);
  262. __m128 Flr0 = detail::sse_flr_ps(_mm_sub_ps(_mm_setzero_ps(), x.Data));
  263. __m128 Sub0 = _mm_sub_ps(Flr0, x.Data);
  264. __m128 Flr1 = detail::sse_flr_ps(x.Data);
  265. __m128 Cmp0 = _mm_cmplt_ps(x.Data, glm::detail::zero);
  266. __m128 Cmp1 = _mm_cmpnlt_ps(x.Data, glm::detail::zero);
  267. __m128 And0 = _mm_and_ps(Sub0, Cmp0);
  268. __m128 And1 = _mm_and_ps(Flr1, Cmp1);
  269. return _mm_or_ps(And0, And1);
  270. }
  271. GLM_FUNC_QUALIFIER detail::fvec4SIMD round
  272. (
  273. detail::fvec4SIMD const & x
  274. )
  275. {
  276. return detail::sse_rnd_ps(x.Data);
  277. }
  278. //GLM_FUNC_QUALIFIER detail::fvec4SIMD roundEven
  279. //(
  280. // detail::fvec4SIMD const & x
  281. //)
  282. //{
  283. //}
  284. GLM_FUNC_QUALIFIER detail::fvec4SIMD ceil
  285. (
  286. detail::fvec4SIMD const & x
  287. )
  288. {
  289. return detail::sse_ceil_ps(x.Data);
  290. }
  291. GLM_FUNC_QUALIFIER detail::fvec4SIMD fract
  292. (
  293. detail::fvec4SIMD const & x
  294. )
  295. {
  296. return detail::sse_frc_ps(x.Data);
  297. }
  298. GLM_FUNC_QUALIFIER detail::fvec4SIMD mod
  299. (
  300. detail::fvec4SIMD const & x,
  301. detail::fvec4SIMD const & y
  302. )
  303. {
  304. return detail::sse_mod_ps(x.Data, y.Data);
  305. }
  306. GLM_FUNC_QUALIFIER detail::fvec4SIMD mod
  307. (
  308. detail::fvec4SIMD const & x,
  309. float const & y
  310. )
  311. {
  312. return detail::sse_mod_ps(x.Data, _mm_set1_ps(y));
  313. }
  314. //GLM_FUNC_QUALIFIER detail::fvec4SIMD modf
  315. //(
  316. // detail::fvec4SIMD const & x,
  317. // detail::fvec4SIMD & i
  318. //)
  319. //{
  320. //}
  321. GLM_FUNC_QUALIFIER detail::fvec4SIMD min
  322. (
  323. detail::fvec4SIMD const & x,
  324. detail::fvec4SIMD const & y
  325. )
  326. {
  327. return _mm_min_ps(x.Data, y.Data);
  328. }
  329. GLM_FUNC_QUALIFIER detail::fvec4SIMD min
  330. (
  331. detail::fvec4SIMD const & x,
  332. float const & y
  333. )
  334. {
  335. return _mm_min_ps(x.Data, _mm_set1_ps(y));
  336. }
  337. GLM_FUNC_QUALIFIER detail::fvec4SIMD max
  338. (
  339. detail::fvec4SIMD const & x,
  340. detail::fvec4SIMD const & y
  341. )
  342. {
  343. return _mm_max_ps(x.Data, y.Data);
  344. }
  345. GLM_FUNC_QUALIFIER detail::fvec4SIMD max
  346. (
  347. detail::fvec4SIMD const & x,
  348. float const & y
  349. )
  350. {
  351. return _mm_max_ps(x.Data, _mm_set1_ps(y));
  352. }
  353. GLM_FUNC_QUALIFIER detail::fvec4SIMD clamp
  354. (
  355. detail::fvec4SIMD const & x,
  356. detail::fvec4SIMD const & minVal,
  357. detail::fvec4SIMD const & maxVal
  358. )
  359. {
  360. return detail::sse_clp_ps(x.Data, minVal.Data, maxVal.Data);
  361. }
  362. GLM_FUNC_QUALIFIER detail::fvec4SIMD clamp
  363. (
  364. detail::fvec4SIMD const & x,
  365. float const & minVal,
  366. float const & maxVal
  367. )
  368. {
  369. return detail::sse_clp_ps(x.Data, _mm_set1_ps(minVal), _mm_set1_ps(maxVal));
  370. }
  371. GLM_FUNC_QUALIFIER detail::fvec4SIMD mix
  372. (
  373. detail::fvec4SIMD const & x,
  374. detail::fvec4SIMD const & y,
  375. detail::fvec4SIMD const & a
  376. )
  377. {
  378. __m128 Sub0 = _mm_sub_ps(y.Data, x.Data);
  379. __m128 Mul0 = _mm_mul_ps(a.Data, Sub0);
  380. return _mm_add_ps(x.Data, Mul0);
  381. }
  382. GLM_FUNC_QUALIFIER detail::fvec4SIMD step
  383. (
  384. detail::fvec4SIMD const & edge,
  385. detail::fvec4SIMD const & x
  386. )
  387. {
  388. __m128 cmp0 = _mm_cmpngt_ps(x.Data, edge.Data);
  389. return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one);
  390. }
  391. GLM_FUNC_QUALIFIER detail::fvec4SIMD step
  392. (
  393. float const & edge,
  394. detail::fvec4SIMD const & x
  395. )
  396. {
  397. __m128 cmp0 = _mm_cmpngt_ps(x.Data, _mm_set1_ps(edge));
  398. return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one);
  399. }
  400. GLM_FUNC_QUALIFIER detail::fvec4SIMD smoothstep
  401. (
  402. detail::fvec4SIMD const & edge0,
  403. detail::fvec4SIMD const & edge1,
  404. detail::fvec4SIMD const & x
  405. )
  406. {
  407. return detail::sse_ssp_ps(edge0.Data, edge1.Data, x.Data);
  408. }
  409. GLM_FUNC_QUALIFIER detail::fvec4SIMD smoothstep
  410. (
  411. float const & edge0,
  412. float const & edge1,
  413. detail::fvec4SIMD const & x
  414. )
  415. {
  416. return detail::sse_ssp_ps(_mm_set1_ps(edge0), _mm_set1_ps(edge1), x.Data);
  417. }
  418. //GLM_FUNC_QUALIFIER bvec4 isnan(detail::fvec4SIMD const & x)
  419. //{
  420. //}
  421. //GLM_FUNC_QUALIFIER bvec4 isinf(detail::fvec4SIMD const & x)
  422. //{
  423. //}
  424. //GLM_FUNC_QUALIFIER detail::ivec4SIMD floatBitsToInt
  425. //(
  426. // detail::fvec4SIMD const & value
  427. //)
  428. //{
  429. //}
  430. //GLM_FUNC_QUALIFIER detail::fvec4SIMD intBitsToFloat
  431. //(
  432. // detail::ivec4SIMD const & value
  433. //)
  434. //{
  435. //}
  436. GLM_FUNC_QUALIFIER detail::fvec4SIMD fma
  437. (
  438. detail::fvec4SIMD const & a,
  439. detail::fvec4SIMD const & b,
  440. detail::fvec4SIMD const & c
  441. )
  442. {
  443. return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data);
  444. }
  445. GLM_FUNC_QUALIFIER float length
  446. (
  447. detail::fvec4SIMD const & x
  448. )
  449. {
  450. detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
  451. detail::fvec4SIMD sqt0 = sqrt(dot0);
  452. float Result = 0;
  453. _mm_store_ss(&Result, sqt0.Data);
  454. return Result;
  455. }
  456. GLM_FUNC_QUALIFIER float fastLength
  457. (
  458. detail::fvec4SIMD const & x
  459. )
  460. {
  461. detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
  462. detail::fvec4SIMD sqt0 = fastSqrt(dot0);
  463. float Result = 0;
  464. _mm_store_ss(&Result, sqt0.Data);
  465. return Result;
  466. }
  467. GLM_FUNC_QUALIFIER float niceLength
  468. (
  469. detail::fvec4SIMD const & x
  470. )
  471. {
  472. detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data);
  473. detail::fvec4SIMD sqt0 = niceSqrt(dot0);
  474. float Result = 0;
  475. _mm_store_ss(&Result, sqt0.Data);
  476. return Result;
  477. }
  478. GLM_FUNC_QUALIFIER detail::fvec4SIMD length4
  479. (
  480. detail::fvec4SIMD const & x
  481. )
  482. {
  483. return sqrt(dot4(x, x));
  484. }
  485. GLM_FUNC_QUALIFIER detail::fvec4SIMD fastLength4
  486. (
  487. detail::fvec4SIMD const & x
  488. )
  489. {
  490. return fastSqrt(dot4(x, x));
  491. }
  492. GLM_FUNC_QUALIFIER detail::fvec4SIMD niceLength4
  493. (
  494. detail::fvec4SIMD const & x
  495. )
  496. {
  497. return niceSqrt(dot4(x, x));
  498. }
  499. GLM_FUNC_QUALIFIER float distance
  500. (
  501. detail::fvec4SIMD const & p0,
  502. detail::fvec4SIMD const & p1
  503. )
  504. {
  505. float Result = 0;
  506. _mm_store_ss(&Result, detail::sse_dst_ps(p0.Data, p1.Data));
  507. return Result;
  508. }
  509. GLM_FUNC_QUALIFIER detail::fvec4SIMD distance4
  510. (
  511. detail::fvec4SIMD const & p0,
  512. detail::fvec4SIMD const & p1
  513. )
  514. {
  515. return detail::sse_dst_ps(p0.Data, p1.Data);
  516. }
  517. GLM_FUNC_QUALIFIER float dot
  518. (
  519. detail::fvec4SIMD const & x,
  520. detail::fvec4SIMD const & y
  521. )
  522. {
  523. float Result = 0;
  524. _mm_store_ss(&Result, detail::sse_dot_ss(x.Data, y.Data));
  525. return Result;
  526. }
  527. GLM_FUNC_QUALIFIER detail::fvec4SIMD dot4
  528. (
  529. detail::fvec4SIMD const & x,
  530. detail::fvec4SIMD const & y
  531. )
  532. {
  533. return detail::sse_dot_ps(x.Data, y.Data);
  534. }
  535. GLM_FUNC_QUALIFIER detail::fvec4SIMD cross
  536. (
  537. detail::fvec4SIMD const & x,
  538. detail::fvec4SIMD const & y
  539. )
  540. {
  541. return detail::sse_xpd_ps(x.Data, y.Data);
  542. }
  543. GLM_FUNC_QUALIFIER detail::fvec4SIMD normalize
  544. (
  545. detail::fvec4SIMD const & x
  546. )
  547. {
  548. __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
  549. __m128 isr0 = inversesqrt(detail::fvec4SIMD(dot0)).Data;
  550. __m128 mul0 = _mm_mul_ps(x.Data, isr0);
  551. return mul0;
  552. }
  553. GLM_FUNC_QUALIFIER detail::fvec4SIMD fastNormalize
  554. (
  555. detail::fvec4SIMD const & x
  556. )
  557. {
  558. __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data);
  559. __m128 isr0 = fastInversesqrt(dot0).Data;
  560. __m128 mul0 = _mm_mul_ps(x.Data, isr0);
  561. return mul0;
  562. }
  563. GLM_FUNC_QUALIFIER detail::fvec4SIMD faceforward
  564. (
  565. detail::fvec4SIMD const & N,
  566. detail::fvec4SIMD const & I,
  567. detail::fvec4SIMD const & Nref
  568. )
  569. {
  570. return detail::sse_ffd_ps(N.Data, I.Data, Nref.Data);
  571. }
  572. GLM_FUNC_QUALIFIER detail::fvec4SIMD reflect
  573. (
  574. detail::fvec4SIMD const & I,
  575. detail::fvec4SIMD const & N
  576. )
  577. {
  578. return detail::sse_rfe_ps(I.Data, N.Data);
  579. }
  580. GLM_FUNC_QUALIFIER detail::fvec4SIMD refract
  581. (
  582. detail::fvec4SIMD const & I,
  583. detail::fvec4SIMD const & N,
  584. float const & eta
  585. )
  586. {
  587. return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta));
  588. }
  589. GLM_FUNC_QUALIFIER detail::fvec4SIMD sqrt(detail::fvec4SIMD const & x)
  590. {
  591. return _mm_mul_ps(inversesqrt(x).Data, x.Data);
  592. }
  593. GLM_FUNC_QUALIFIER detail::fvec4SIMD niceSqrt(detail::fvec4SIMD const & x)
  594. {
  595. return _mm_sqrt_ps(x.Data);
  596. }
  597. GLM_FUNC_QUALIFIER detail::fvec4SIMD fastSqrt(detail::fvec4SIMD const & x)
  598. {
  599. return _mm_mul_ps(fastInversesqrt(x.Data).Data, x.Data);
  600. }
  601. // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration
  602. // By Elan Ruskin, http://assemblyrequired.crashworks.org/
  603. GLM_FUNC_QUALIFIER detail::fvec4SIMD inversesqrt(detail::fvec4SIMD const & x)
  604. {
  605. GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load
  606. GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5};
  607. __m128 recip = _mm_rsqrt_ps(x.Data); // "estimate" opcode
  608. __m128 halfrecip = _mm_mul_ps(half, recip);
  609. __m128 threeminus_xrr = _mm_sub_ps(three, _mm_mul_ps(x.Data, _mm_mul_ps(recip, recip)));
  610. return _mm_mul_ps(halfrecip, threeminus_xrr);
  611. }
  612. GLM_FUNC_QUALIFIER detail::fvec4SIMD fastInversesqrt(detail::fvec4SIMD const & x)
  613. {
  614. return _mm_rsqrt_ps(x.Data);
  615. }
  616. }//namespace glm