square_amd64.s 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // This code was translated into a form compatible with 6a from the public
  5. // domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
  6. // +build amd64,!gccgo,!appengine
  7. #include "const_amd64.h"
  8. // func square(out, in *[5]uint64)
  9. TEXT ·square(SB),7,$0-16
  10. MOVQ out+0(FP), DI
  11. MOVQ in+8(FP), SI
  12. MOVQ 0(SI),AX
  13. MULQ 0(SI)
  14. MOVQ AX,CX
  15. MOVQ DX,R8
  16. MOVQ 0(SI),AX
  17. SHLQ $1,AX
  18. MULQ 8(SI)
  19. MOVQ AX,R9
  20. MOVQ DX,R10
  21. MOVQ 0(SI),AX
  22. SHLQ $1,AX
  23. MULQ 16(SI)
  24. MOVQ AX,R11
  25. MOVQ DX,R12
  26. MOVQ 0(SI),AX
  27. SHLQ $1,AX
  28. MULQ 24(SI)
  29. MOVQ AX,R13
  30. MOVQ DX,R14
  31. MOVQ 0(SI),AX
  32. SHLQ $1,AX
  33. MULQ 32(SI)
  34. MOVQ AX,R15
  35. MOVQ DX,BX
  36. MOVQ 8(SI),AX
  37. MULQ 8(SI)
  38. ADDQ AX,R11
  39. ADCQ DX,R12
  40. MOVQ 8(SI),AX
  41. SHLQ $1,AX
  42. MULQ 16(SI)
  43. ADDQ AX,R13
  44. ADCQ DX,R14
  45. MOVQ 8(SI),AX
  46. SHLQ $1,AX
  47. MULQ 24(SI)
  48. ADDQ AX,R15
  49. ADCQ DX,BX
  50. MOVQ 8(SI),DX
  51. IMUL3Q $38,DX,AX
  52. MULQ 32(SI)
  53. ADDQ AX,CX
  54. ADCQ DX,R8
  55. MOVQ 16(SI),AX
  56. MULQ 16(SI)
  57. ADDQ AX,R15
  58. ADCQ DX,BX
  59. MOVQ 16(SI),DX
  60. IMUL3Q $38,DX,AX
  61. MULQ 24(SI)
  62. ADDQ AX,CX
  63. ADCQ DX,R8
  64. MOVQ 16(SI),DX
  65. IMUL3Q $38,DX,AX
  66. MULQ 32(SI)
  67. ADDQ AX,R9
  68. ADCQ DX,R10
  69. MOVQ 24(SI),DX
  70. IMUL3Q $19,DX,AX
  71. MULQ 24(SI)
  72. ADDQ AX,R9
  73. ADCQ DX,R10
  74. MOVQ 24(SI),DX
  75. IMUL3Q $38,DX,AX
  76. MULQ 32(SI)
  77. ADDQ AX,R11
  78. ADCQ DX,R12
  79. MOVQ 32(SI),DX
  80. IMUL3Q $19,DX,AX
  81. MULQ 32(SI)
  82. ADDQ AX,R13
  83. ADCQ DX,R14
  84. MOVQ $REDMASK51,SI
  85. SHLQ $13,R8:CX
  86. ANDQ SI,CX
  87. SHLQ $13,R10:R9
  88. ANDQ SI,R9
  89. ADDQ R8,R9
  90. SHLQ $13,R12:R11
  91. ANDQ SI,R11
  92. ADDQ R10,R11
  93. SHLQ $13,R14:R13
  94. ANDQ SI,R13
  95. ADDQ R12,R13
  96. SHLQ $13,BX:R15
  97. ANDQ SI,R15
  98. ADDQ R14,R15
  99. IMUL3Q $19,BX,DX
  100. ADDQ DX,CX
  101. MOVQ CX,DX
  102. SHRQ $51,DX
  103. ADDQ R9,DX
  104. ANDQ SI,CX
  105. MOVQ DX,R8
  106. SHRQ $51,DX
  107. ADDQ R11,DX
  108. ANDQ SI,R8
  109. MOVQ DX,R9
  110. SHRQ $51,DX
  111. ADDQ R13,DX
  112. ANDQ SI,R9
  113. MOVQ DX,AX
  114. SHRQ $51,DX
  115. ADDQ R15,DX
  116. ANDQ SI,AX
  117. MOVQ DX,R10
  118. SHRQ $51,DX
  119. IMUL3Q $19,DX,DX
  120. ADDQ DX,CX
  121. ANDQ SI,R10
  122. MOVQ CX,0(DI)
  123. MOVQ R8,8(DI)
  124. MOVQ R9,16(DI)
  125. MOVQ AX,24(DI)
  126. MOVQ R10,32(DI)
  127. RET