1
0

rs_amd64.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868
  1. package reedsolomon
  2. import (
  3. "errors"
  4. "sync"
  5. "github.com/templexxx/cpufeat"
  6. )
  7. // SIMD Instruction Extensions
  8. const (
  9. none = iota
  10. avx2
  11. ssse3
  12. )
  13. var extension = none
  14. func init() {
  15. getEXT()
  16. }
  17. func getEXT() {
  18. if cpufeat.X86.HasAVX2 {
  19. extension = avx2
  20. return
  21. } else if cpufeat.X86.HasSSSE3 {
  22. extension = ssse3
  23. return
  24. } else {
  25. extension = none
  26. return
  27. }
  28. }
  29. //go:noescape
  30. func copy32B(dst, src []byte) // Need SSE2(introduced in 2001)
  31. func initTbl(g matrix, rows, cols int, tbl []byte) {
  32. off := 0
  33. for i := 0; i < cols; i++ {
  34. for j := 0; j < rows; j++ {
  35. c := g[j*cols+i]
  36. t := lowhighTbl[c][:]
  37. copy32B(tbl[off:off+32], t)
  38. off += 32
  39. }
  40. }
  41. }
  42. // At most 3060 inverse matrix (when data=14, parity=4, calc by mathtool/cntinverse)
  43. // In practice, data usually below 12, parity below 5
  44. func okCache(data, parity int) bool {
  45. if data < 15 && parity < 5 { // you can change it, but the data+parity can't be bigger than 32 (tips: see the codes about make inverse matrix)
  46. return true
  47. }
  48. return false
  49. }
  50. type (
  51. encSSSE3 encSIMD
  52. encAVX2 encSIMD
  53. encSIMD struct {
  54. data int
  55. parity int
  56. encode matrix
  57. gen matrix
  58. tbl []byte
  59. // inverse matrix cache is design for small vect size ( < 4KB )
  60. // it will save time for calculating inverse matrix
  61. // but it's not so important for big vect size
  62. enableCache bool
  63. inverseCache iCache
  64. }
  65. iCache struct {
  66. sync.RWMutex
  67. data map[uint32][]byte
  68. }
  69. )
  70. func newRS(d, p int, em matrix) (enc Encoder) {
  71. g := em[d*d:]
  72. if extension == none {
  73. return &encBase{data: d, parity: p, encode: em, gen: g}
  74. }
  75. t := make([]byte, d*p*32)
  76. initTbl(g, p, d, t)
  77. ok := okCache(d, p)
  78. if extension == avx2 {
  79. e := &encAVX2{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok,
  80. inverseCache: iCache{data: make(map[uint32][]byte)}}
  81. return e
  82. }
  83. e := &encSSSE3{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok,
  84. inverseCache: iCache{data: make(map[uint32][]byte)}}
  85. return e
  86. }
  87. // Size of sub-vector
  88. const unit int = 16 * 1024
  89. func getDo(n int) int {
  90. if n < unit {
  91. c := n >> 4
  92. if c == 0 {
  93. return unit
  94. }
  95. return c << 4
  96. }
  97. return unit
  98. }
  99. func (e *encAVX2) Encode(vects [][]byte) (err error) {
  100. d := e.data
  101. p := e.parity
  102. size, err := checkEnc(d, p, vects)
  103. if err != nil {
  104. return
  105. }
  106. dv := vects[:d]
  107. pv := vects[d:]
  108. start, end := 0, 0
  109. do := getDo(size)
  110. for start < size {
  111. end = start + do
  112. if end <= size {
  113. e.matrixMul(start, end, dv, pv)
  114. start = end
  115. } else {
  116. e.matrixMulRemain(start, size, dv, pv)
  117. start = size
  118. }
  119. }
  120. return
  121. }
  122. //go:noescape
  123. func mulVectAVX2(tbl, d, p []byte)
  124. //go:noescape
  125. func mulVectAddAVX2(tbl, d, p []byte)
  126. func (e *encAVX2) matrixMul(start, end int, dv, pv [][]byte) {
  127. d := e.data
  128. p := e.parity
  129. tbl := e.tbl
  130. off := 0
  131. for i := 0; i < d; i++ {
  132. for j := 0; j < p; j++ {
  133. t := tbl[off : off+32]
  134. if i != 0 {
  135. mulVectAddAVX2(t, dv[i][start:end], pv[j][start:end])
  136. } else {
  137. mulVectAVX2(t, dv[0][start:end], pv[j][start:end])
  138. }
  139. off += 32
  140. }
  141. }
  142. }
  143. func (e *encAVX2) matrixMulRemain(start, end int, dv, pv [][]byte) {
  144. undone := end - start
  145. do := (undone >> 4) << 4
  146. d := e.data
  147. p := e.parity
  148. tbl := e.tbl
  149. if do >= 16 {
  150. end2 := start + do
  151. off := 0
  152. for i := 0; i < d; i++ {
  153. for j := 0; j < p; j++ {
  154. t := tbl[off : off+32]
  155. if i != 0 {
  156. mulVectAddAVX2(t, dv[i][start:end2], pv[j][start:end2])
  157. } else {
  158. mulVectAVX2(t, dv[0][start:end2], pv[j][start:end2])
  159. }
  160. off += 32
  161. }
  162. }
  163. start = end
  164. }
  165. if undone > do {
  166. // may recalculate some data, but still improve a lot
  167. start2 := end - 16
  168. if start2 >= 0 {
  169. off := 0
  170. for i := 0; i < d; i++ {
  171. for j := 0; j < p; j++ {
  172. t := tbl[off : off+32]
  173. if i != 0 {
  174. mulVectAddAVX2(t, dv[i][start2:end], pv[j][start2:end])
  175. } else {
  176. mulVectAVX2(t, dv[0][start2:end], pv[j][start2:end])
  177. }
  178. off += 32
  179. }
  180. }
  181. } else {
  182. g := e.gen
  183. for i := 0; i < d; i++ {
  184. for j := 0; j < p; j++ {
  185. if i != 0 {
  186. mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
  187. } else {
  188. mulVect(g[j*d], dv[0][start:], pv[j][start:])
  189. }
  190. }
  191. }
  192. }
  193. }
  194. }
  195. // use generator-matrix but not tbls for encoding
  196. // it's design for reconstructing
  197. // for small vects, it cost to much time on initTbl, so drop it
  198. // and for big vects, the tbls can't impact much, because the cache will be filled with vects' data
  199. func (e *encAVX2) encodeGen(vects [][]byte) (err error) {
  200. d := e.data
  201. p := e.parity
  202. size, err := checkEnc(d, p, vects)
  203. if err != nil {
  204. return
  205. }
  206. dv := vects[:d]
  207. pv := vects[d:]
  208. start, end := 0, 0
  209. do := getDo(size)
  210. for start < size {
  211. end = start + do
  212. if end <= size {
  213. e.matrixMulGen(start, end, dv, pv)
  214. start = end
  215. } else {
  216. e.matrixMulRemainGen(start, size, dv, pv)
  217. start = size
  218. }
  219. }
  220. return
  221. }
  222. func (e *encAVX2) matrixMulGen(start, end int, dv, pv [][]byte) {
  223. d := e.data
  224. p := e.parity
  225. g := e.gen
  226. for i := 0; i < d; i++ {
  227. for j := 0; j < p; j++ {
  228. t := lowhighTbl[g[j*d+i]][:]
  229. if i != 0 {
  230. mulVectAddAVX2(t, dv[i][start:end], pv[j][start:end])
  231. } else {
  232. mulVectAVX2(t, dv[0][start:end], pv[j][start:end])
  233. }
  234. }
  235. }
  236. }
  237. func (e *encAVX2) matrixMulRemainGen(start, end int, dv, pv [][]byte) {
  238. undone := end - start
  239. do := (undone >> 4) << 4
  240. d := e.data
  241. p := e.parity
  242. g := e.gen
  243. if do >= 16 {
  244. end2 := start + do
  245. for i := 0; i < d; i++ {
  246. for j := 0; j < p; j++ {
  247. t := lowhighTbl[g[j*d+i]][:]
  248. if i != 0 {
  249. mulVectAddAVX2(t, dv[i][start:end2], pv[j][start:end2])
  250. } else {
  251. mulVectAVX2(t, dv[0][start:end2], pv[j][start:end2])
  252. }
  253. }
  254. }
  255. start = end
  256. }
  257. if undone > do {
  258. start2 := end - 16
  259. if start2 >= 0 {
  260. for i := 0; i < d; i++ {
  261. for j := 0; j < p; j++ {
  262. t := lowhighTbl[g[j*d+i]][:]
  263. if i != 0 {
  264. mulVectAddAVX2(t, dv[i][start2:end], pv[j][start2:end])
  265. } else {
  266. mulVectAVX2(t, dv[0][start2:end], pv[j][start2:end])
  267. }
  268. }
  269. }
  270. } else {
  271. for i := 0; i < d; i++ {
  272. for j := 0; j < p; j++ {
  273. if i != 0 {
  274. mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
  275. } else {
  276. mulVect(g[j*d], dv[0][start:], pv[j][start:])
  277. }
  278. }
  279. }
  280. }
  281. }
  282. }
  283. func (e *encAVX2) Reconstruct(vects [][]byte) (err error) {
  284. return e.reconstruct(vects, false)
  285. }
  286. func (e *encAVX2) ReconstructData(vects [][]byte) (err error) {
  287. return e.reconstruct(vects, true)
  288. }
  289. func (e *encAVX2) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
  290. return e.reconstWithPos(vects, has, dLost, pLost, false)
  291. }
  292. func (e *encAVX2) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
  293. return e.reconstWithPos(vects, has, dLost, nil, true)
  294. }
  295. func (e *encAVX2) makeGen(has, dLost []int) (gen []byte, err error) {
  296. d := e.data
  297. em := e.encode
  298. cnt := len(dLost)
  299. if !e.enableCache {
  300. matrixbuf := make([]byte, 4*d*d+cnt*d)
  301. m := matrixbuf[:d*d]
  302. for i, l := range has {
  303. copy(m[i*d:i*d+d], em[l*d:l*d+d])
  304. }
  305. raw := matrixbuf[d*d : 3*d*d]
  306. im := matrixbuf[3*d*d : 4*d*d]
  307. err2 := matrix(m).invert(raw, d, im)
  308. if err2 != nil {
  309. return nil, err2
  310. }
  311. g := matrixbuf[4*d*d:]
  312. for i, l := range dLost {
  313. copy(g[i*d:i*d+d], im[l*d:l*d+d])
  314. }
  315. return g, nil
  316. }
  317. var ikey uint32
  318. for _, p := range has {
  319. ikey += 1 << uint8(p)
  320. }
  321. e.inverseCache.RLock()
  322. v, ok := e.inverseCache.data[ikey]
  323. if ok {
  324. im := v
  325. g := make([]byte, cnt*d)
  326. for i, l := range dLost {
  327. copy(g[i*d:i*d+d], im[l*d:l*d+d])
  328. }
  329. e.inverseCache.RUnlock()
  330. return g, nil
  331. }
  332. e.inverseCache.RUnlock()
  333. matrixbuf := make([]byte, 4*d*d+cnt*d)
  334. m := matrixbuf[:d*d]
  335. for i, l := range has {
  336. copy(m[i*d:i*d+d], em[l*d:l*d+d])
  337. }
  338. raw := matrixbuf[d*d : 3*d*d]
  339. im := matrixbuf[3*d*d : 4*d*d]
  340. err2 := matrix(m).invert(raw, d, im)
  341. if err2 != nil {
  342. return nil, err2
  343. }
  344. e.inverseCache.Lock()
  345. e.inverseCache.data[ikey] = im
  346. e.inverseCache.Unlock()
  347. g := matrixbuf[4*d*d:]
  348. for i, l := range dLost {
  349. copy(g[i*d:i*d+d], im[l*d:l*d+d])
  350. }
  351. return g, nil
  352. }
  353. func (e *encAVX2) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
  354. d := e.data
  355. em := e.encode
  356. dCnt := len(dLost)
  357. size := len(vects[has[0]])
  358. if dCnt != 0 {
  359. vtmp := make([][]byte, d+dCnt)
  360. for i, p := range has {
  361. vtmp[i] = vects[p]
  362. }
  363. for i, p := range dLost {
  364. if len(vects[p]) == 0 {
  365. vects[p] = make([]byte, size)
  366. }
  367. vtmp[i+d] = vects[p]
  368. }
  369. g, err2 := e.makeGen(has, dLost)
  370. if err2 != nil {
  371. return
  372. }
  373. etmp := &encAVX2{data: d, parity: dCnt, gen: g}
  374. err2 = etmp.encodeGen(vtmp)
  375. if err2 != nil {
  376. return err2
  377. }
  378. }
  379. if dataOnly {
  380. return
  381. }
  382. pCnt := len(pLost)
  383. if pCnt != 0 {
  384. g := make([]byte, pCnt*d)
  385. for i, l := range pLost {
  386. copy(g[i*d:i*d+d], em[l*d:l*d+d])
  387. }
  388. vtmp := make([][]byte, d+pCnt)
  389. for i := 0; i < d; i++ {
  390. vtmp[i] = vects[i]
  391. }
  392. for i, p := range pLost {
  393. if len(vects[p]) == 0 {
  394. vects[p] = make([]byte, size)
  395. }
  396. vtmp[i+d] = vects[p]
  397. }
  398. etmp := &encAVX2{data: d, parity: pCnt, gen: g}
  399. err2 := etmp.encodeGen(vtmp)
  400. if err2 != nil {
  401. return err2
  402. }
  403. }
  404. return
  405. }
  406. func (e *encAVX2) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
  407. d := e.data
  408. p := e.parity
  409. if len(has) != d {
  410. return errors.New("rs.Reconst: not enough vects")
  411. }
  412. dCnt := len(dLost)
  413. if dCnt > p {
  414. return errors.New("rs.Reconst: not enough vects")
  415. }
  416. pCnt := len(pLost)
  417. if pCnt > p {
  418. return errors.New("rs.Reconst: not enough vects")
  419. }
  420. return e.reconst(vects, has, dLost, pLost, dataOnly)
  421. }
  422. func (e *encAVX2) reconstruct(vects [][]byte, dataOnly bool) (err error) {
  423. d := e.data
  424. p := e.parity
  425. t := d + p
  426. listBuf := make([]int, t+p)
  427. has := listBuf[:d]
  428. dLost := listBuf[d:t]
  429. pLost := listBuf[t : t+p]
  430. hasCnt, dCnt, pCnt := 0, 0, 0
  431. for i := 0; i < t; i++ {
  432. if vects[i] != nil {
  433. if hasCnt < d {
  434. has[hasCnt] = i
  435. hasCnt++
  436. }
  437. } else {
  438. if i < d {
  439. if dCnt < p {
  440. dLost[dCnt] = i
  441. dCnt++
  442. } else {
  443. return errors.New("rs.Reconst: not enough vects")
  444. }
  445. } else {
  446. if pCnt < p {
  447. pLost[pCnt] = i
  448. pCnt++
  449. } else {
  450. return errors.New("rs.Reconst: not enough vects")
  451. }
  452. }
  453. }
  454. }
  455. if hasCnt != d {
  456. return errors.New("rs.Reconst: not enough vects")
  457. }
  458. dLost = dLost[:dCnt]
  459. pLost = pLost[:pCnt]
  460. return e.reconst(vects, has, dLost, pLost, dataOnly)
  461. }
  462. func (e *encSSSE3) Encode(vects [][]byte) (err error) {
  463. d := e.data
  464. p := e.parity
  465. size, err := checkEnc(d, p, vects)
  466. if err != nil {
  467. return
  468. }
  469. dv := vects[:d]
  470. pv := vects[d:]
  471. start, end := 0, 0
  472. do := getDo(size)
  473. for start < size {
  474. end = start + do
  475. if end <= size {
  476. e.matrixMul(start, end, dv, pv)
  477. start = end
  478. } else {
  479. e.matrixMulRemain(start, size, dv, pv)
  480. start = size
  481. }
  482. }
  483. return
  484. }
  485. //go:noescape
  486. func mulVectSSSE3(tbl, d, p []byte)
  487. //go:noescape
  488. func mulVectAddSSSE3(tbl, d, p []byte)
  489. func (e *encSSSE3) matrixMul(start, end int, dv, pv [][]byte) {
  490. d := e.data
  491. p := e.parity
  492. tbl := e.tbl
  493. off := 0
  494. for i := 0; i < d; i++ {
  495. for j := 0; j < p; j++ {
  496. t := tbl[off : off+32]
  497. if i != 0 {
  498. mulVectAddSSSE3(t, dv[i][start:end], pv[j][start:end])
  499. } else {
  500. mulVectSSSE3(t, dv[0][start:end], pv[j][start:end])
  501. }
  502. off += 32
  503. }
  504. }
  505. }
  506. func (e *encSSSE3) matrixMulRemain(start, end int, dv, pv [][]byte) {
  507. undone := end - start
  508. do := (undone >> 4) << 4
  509. d := e.data
  510. p := e.parity
  511. tbl := e.tbl
  512. if do >= 16 {
  513. end2 := start + do
  514. off := 0
  515. for i := 0; i < d; i++ {
  516. for j := 0; j < p; j++ {
  517. t := tbl[off : off+32]
  518. if i != 0 {
  519. mulVectAddSSSE3(t, dv[i][start:end2], pv[j][start:end2])
  520. } else {
  521. mulVectSSSE3(t, dv[0][start:end2], pv[j][start:end2])
  522. }
  523. off += 32
  524. }
  525. }
  526. start = end
  527. }
  528. if undone > do {
  529. start2 := end - 16
  530. if start2 >= 0 {
  531. off := 0
  532. for i := 0; i < d; i++ {
  533. for j := 0; j < p; j++ {
  534. t := tbl[off : off+32]
  535. if i != 0 {
  536. mulVectAddSSSE3(t, dv[i][start2:end], pv[j][start2:end])
  537. } else {
  538. mulVectSSSE3(t, dv[0][start2:end], pv[j][start2:end])
  539. }
  540. off += 32
  541. }
  542. }
  543. } else {
  544. g := e.gen
  545. for i := 0; i < d; i++ {
  546. for j := 0; j < p; j++ {
  547. if i != 0 {
  548. mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
  549. } else {
  550. mulVect(g[j*d], dv[0][start:], pv[j][start:])
  551. }
  552. }
  553. }
  554. }
  555. }
  556. }
  557. // use generator-matrix but not tbls for encoding
  558. // it's design for reconstructing
  559. // for small vects, it cost to much time on initTbl, so drop it
  560. // and for big vects, the tbls can't impact much, because the cache will be filled with vects' data
  561. func (e *encSSSE3) encodeGen(vects [][]byte) (err error) {
  562. d := e.data
  563. p := e.parity
  564. size, err := checkEnc(d, p, vects)
  565. if err != nil {
  566. return
  567. }
  568. dv := vects[:d]
  569. pv := vects[d:]
  570. start, end := 0, 0
  571. do := getDo(size)
  572. for start < size {
  573. end = start + do
  574. if end <= size {
  575. e.matrixMulGen(start, end, dv, pv)
  576. start = end
  577. } else {
  578. e.matrixMulRemainGen(start, size, dv, pv)
  579. start = size
  580. }
  581. }
  582. return
  583. }
  584. func (e *encSSSE3) matrixMulGen(start, end int, dv, pv [][]byte) {
  585. d := e.data
  586. p := e.parity
  587. g := e.gen
  588. for i := 0; i < d; i++ {
  589. for j := 0; j < p; j++ {
  590. t := lowhighTbl[g[j*d+i]][:]
  591. if i != 0 {
  592. mulVectAddSSSE3(t, dv[i][start:end], pv[j][start:end])
  593. } else {
  594. mulVectSSSE3(t, dv[0][start:end], pv[j][start:end])
  595. }
  596. }
  597. }
  598. }
  599. func (e *encSSSE3) matrixMulRemainGen(start, end int, dv, pv [][]byte) {
  600. undone := end - start
  601. do := (undone >> 4) << 4
  602. d := e.data
  603. p := e.parity
  604. g := e.gen
  605. if do >= 16 {
  606. end2 := start + do
  607. for i := 0; i < d; i++ {
  608. for j := 0; j < p; j++ {
  609. t := lowhighTbl[g[j*d+i]][:]
  610. if i != 0 {
  611. mulVectAddSSSE3(t, dv[i][start:end2], pv[j][start:end2])
  612. } else {
  613. mulVectSSSE3(t, dv[0][start:end2], pv[j][start:end2])
  614. }
  615. }
  616. }
  617. start = end
  618. }
  619. if undone > do {
  620. start2 := end - 16
  621. if start2 >= 0 {
  622. for i := 0; i < d; i++ {
  623. for j := 0; j < p; j++ {
  624. t := lowhighTbl[g[j*d+i]][:]
  625. if i != 0 {
  626. mulVectAddSSSE3(t, dv[i][start2:end], pv[j][start2:end])
  627. } else {
  628. mulVectSSSE3(t, dv[0][start2:end], pv[j][start2:end])
  629. }
  630. }
  631. }
  632. } else {
  633. for i := 0; i < d; i++ {
  634. for j := 0; j < p; j++ {
  635. if i != 0 {
  636. mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
  637. } else {
  638. mulVect(g[j*d], dv[0][start:], pv[j][start:])
  639. }
  640. }
  641. }
  642. }
  643. }
  644. }
  645. func (e *encSSSE3) Reconstruct(vects [][]byte) (err error) {
  646. return e.reconstruct(vects, false)
  647. }
  648. func (e *encSSSE3) ReconstructData(vects [][]byte) (err error) {
  649. return e.reconstruct(vects, true)
  650. }
  651. func (e *encSSSE3) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
  652. return e.reconstWithPos(vects, has, dLost, pLost, false)
  653. }
  654. func (e *encSSSE3) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
  655. return e.reconstWithPos(vects, has, dLost, nil, true)
  656. }
  657. func (e *encSSSE3) makeGen(has, dLost []int) (gen []byte, err error) {
  658. d := e.data
  659. em := e.encode
  660. cnt := len(dLost)
  661. if !e.enableCache {
  662. matrixbuf := make([]byte, 4*d*d+cnt*d)
  663. m := matrixbuf[:d*d]
  664. for i, l := range has {
  665. copy(m[i*d:i*d+d], em[l*d:l*d+d])
  666. }
  667. raw := matrixbuf[d*d : 3*d*d]
  668. im := matrixbuf[3*d*d : 4*d*d]
  669. err2 := matrix(m).invert(raw, d, im)
  670. if err2 != nil {
  671. return nil, err2
  672. }
  673. g := matrixbuf[4*d*d:]
  674. for i, l := range dLost {
  675. copy(g[i*d:i*d+d], im[l*d:l*d+d])
  676. }
  677. return g, nil
  678. }
  679. var ikey uint32
  680. for _, p := range has {
  681. ikey += 1 << uint8(p)
  682. }
  683. e.inverseCache.RLock()
  684. v, ok := e.inverseCache.data[ikey]
  685. if ok {
  686. im := v
  687. g := make([]byte, cnt*d)
  688. for i, l := range dLost {
  689. copy(g[i*d:i*d+d], im[l*d:l*d+d])
  690. }
  691. e.inverseCache.RUnlock()
  692. return g, nil
  693. }
  694. e.inverseCache.RUnlock()
  695. matrixbuf := make([]byte, 4*d*d+cnt*d)
  696. m := matrixbuf[:d*d]
  697. for i, l := range has {
  698. copy(m[i*d:i*d+d], em[l*d:l*d+d])
  699. }
  700. raw := matrixbuf[d*d : 3*d*d]
  701. im := matrixbuf[3*d*d : 4*d*d]
  702. err2 := matrix(m).invert(raw, d, im)
  703. if err2 != nil {
  704. return nil, err2
  705. }
  706. e.inverseCache.Lock()
  707. e.inverseCache.data[ikey] = im
  708. e.inverseCache.Unlock()
  709. g := matrixbuf[4*d*d:]
  710. for i, l := range dLost {
  711. copy(g[i*d:i*d+d], im[l*d:l*d+d])
  712. }
  713. return g, nil
  714. }
  715. func (e *encSSSE3) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
  716. d := e.data
  717. em := e.encode
  718. dCnt := len(dLost)
  719. size := len(vects[has[0]])
  720. if dCnt != 0 {
  721. vtmp := make([][]byte, d+dCnt)
  722. for i, p := range has {
  723. vtmp[i] = vects[p]
  724. }
  725. for i, p := range dLost {
  726. if len(vects[p]) == 0 {
  727. vects[p] = make([]byte, size)
  728. }
  729. vtmp[i+d] = vects[p]
  730. }
  731. g, err2 := e.makeGen(has, dLost)
  732. if err2 != nil {
  733. return
  734. }
  735. etmp := &encSSSE3{data: d, parity: dCnt, gen: g}
  736. err2 = etmp.encodeGen(vtmp)
  737. if err2 != nil {
  738. return err2
  739. }
  740. }
  741. if dataOnly {
  742. return
  743. }
  744. pCnt := len(pLost)
  745. if pCnt != 0 {
  746. g := make([]byte, pCnt*d)
  747. for i, l := range pLost {
  748. copy(g[i*d:i*d+d], em[l*d:l*d+d])
  749. }
  750. vtmp := make([][]byte, d+pCnt)
  751. for i := 0; i < d; i++ {
  752. vtmp[i] = vects[i]
  753. }
  754. for i, p := range pLost {
  755. if len(vects[p]) == 0 {
  756. vects[p] = make([]byte, size)
  757. }
  758. vtmp[i+d] = vects[p]
  759. }
  760. etmp := &encSSSE3{data: d, parity: pCnt, gen: g}
  761. err2 := etmp.encodeGen(vtmp)
  762. if err2 != nil {
  763. return err2
  764. }
  765. }
  766. return
  767. }
  768. func (e *encSSSE3) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
  769. d := e.data
  770. p := e.parity
  771. if len(has) != d {
  772. return errors.New("rs.Reconst: not enough vects")
  773. }
  774. dCnt := len(dLost)
  775. if dCnt > p {
  776. return errors.New("rs.Reconst: not enough vects")
  777. }
  778. pCnt := len(pLost)
  779. if pCnt > p {
  780. return errors.New("rs.Reconst: not enough vects")
  781. }
  782. return e.reconst(vects, has, dLost, pLost, dataOnly)
  783. }
  784. func (e *encSSSE3) reconstruct(vects [][]byte, dataOnly bool) (err error) {
  785. d := e.data
  786. p := e.parity
  787. t := d + p
  788. listBuf := make([]int, t+p)
  789. has := listBuf[:d]
  790. dLost := listBuf[d:t]
  791. pLost := listBuf[t : t+p]
  792. hasCnt, dCnt, pCnt := 0, 0, 0
  793. for i := 0; i < t; i++ {
  794. if vects[i] != nil {
  795. if hasCnt < d {
  796. has[hasCnt] = i
  797. hasCnt++
  798. }
  799. } else {
  800. if i < d {
  801. if dCnt < p {
  802. dLost[dCnt] = i
  803. dCnt++
  804. } else {
  805. return errors.New("rs.Reconst: not enough vects")
  806. }
  807. } else {
  808. if pCnt < p {
  809. pLost[pCnt] = i
  810. pCnt++
  811. } else {
  812. return errors.New("rs.Reconst: not enough vects")
  813. }
  814. }
  815. }
  816. }
  817. if hasCnt != d {
  818. return errors.New("rs.Reconst: not enough vects")
  819. }
  820. dLost = dLost[:dCnt]
  821. pLost = pLost[:pCnt]
  822. return e.reconst(vects, has, dLost, pLost, dataOnly)
  823. }