nosimd.go 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. // Copyright 2013 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package xor
  5. import (
  6. "runtime"
  7. "unsafe"
  8. )
  9. const wordSize = int(unsafe.Sizeof(uintptr(0)))
  10. const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x"
  11. // xor the bytes in a and b. The destination is assumed to have enough space.
  12. func bytesNoSIMD(dst, a, b []byte, size int) {
  13. if supportsUnaligned {
  14. fastXORBytes(dst, a, b, size)
  15. } else {
  16. // TODO(hanwen): if (dst, a, b) have common alignment
  17. // we could still try fastXORBytes. It is not clear
  18. // how often this happens, and it's only worth it if
  19. // the block encryption itself is hardware
  20. // accelerated.
  21. safeXORBytes(dst, a, b, size)
  22. }
  23. }
  24. // split slice for cache-friendly
  25. const unitSize = 16 * 1024
  26. func matrixNoSIMD(dst []byte, src [][]byte) {
  27. size := len(src[0])
  28. start := 0
  29. do := unitSize
  30. for start < size {
  31. end := start + do
  32. if end <= size {
  33. partNoSIMD(start, end, dst, src)
  34. start = start + do
  35. } else {
  36. partNoSIMD(start, size, dst, src)
  37. start = size
  38. }
  39. }
  40. }
  41. // split vect will improve performance with big data by reducing cache pollution
  42. func partNoSIMD(start, end int, dst []byte, src [][]byte) {
  43. bytesNoSIMD(dst[start:end], src[0][start:end], src[1][start:end], end-start)
  44. for i := 2; i < len(src); i++ {
  45. bytesNoSIMD(dst[start:end], dst[start:end], src[i][start:end], end-start)
  46. }
  47. }
  48. // fastXORBytes xor in bulk. It only works on architectures that
  49. // support unaligned read/writes.
  50. func fastXORBytes(dst, a, b []byte, n int) {
  51. w := n / wordSize
  52. if w > 0 {
  53. wordBytes := w * wordSize
  54. fastXORWords(dst[:wordBytes], a[:wordBytes], b[:wordBytes])
  55. }
  56. for i := n - n%wordSize; i < n; i++ {
  57. dst[i] = a[i] ^ b[i]
  58. }
  59. }
  60. func safeXORBytes(dst, a, b []byte, n int) {
  61. ex := n % 8
  62. for i := 0; i < ex; i++ {
  63. dst[i] = a[i] ^ b[i]
  64. }
  65. for i := ex; i < n; i += 8 {
  66. _dst := dst[i : i+8]
  67. _a := a[i : i+8]
  68. _b := b[i : i+8]
  69. _dst[0] = _a[0] ^ _b[0]
  70. _dst[1] = _a[1] ^ _b[1]
  71. _dst[2] = _a[2] ^ _b[2]
  72. _dst[3] = _a[3] ^ _b[3]
  73. _dst[4] = _a[4] ^ _b[4]
  74. _dst[5] = _a[5] ^ _b[5]
  75. _dst[6] = _a[6] ^ _b[6]
  76. _dst[7] = _a[7] ^ _b[7]
  77. }
  78. }
  79. // fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
  80. // The arguments are assumed to be of equal length.
  81. func fastXORWords(dst, a, b []byte) {
  82. dw := *(*[]uintptr)(unsafe.Pointer(&dst))
  83. aw := *(*[]uintptr)(unsafe.Pointer(&a))
  84. bw := *(*[]uintptr)(unsafe.Pointer(&b))
  85. n := len(b) / wordSize
  86. ex := n % 8
  87. for i := 0; i < ex; i++ {
  88. dw[i] = aw[i] ^ bw[i]
  89. }
  90. for i := ex; i < n; i += 8 {
  91. _dw := dw[i : i+8]
  92. _aw := aw[i : i+8]
  93. _bw := bw[i : i+8]
  94. _dw[0] = _aw[0] ^ _bw[0]
  95. _dw[1] = _aw[1] ^ _bw[1]
  96. _dw[2] = _aw[2] ^ _bw[2]
  97. _dw[3] = _aw[3] ^ _bw[3]
  98. _dw[4] = _aw[4] ^ _bw[4]
  99. _dw[5] = _aw[5] ^ _bw[5]
  100. _dw[6] = _aw[6] ^ _bw[6]
  101. _dw[7] = _aw[7] ^ _bw[7]
  102. }
  103. }