cpuid.go 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987
  1. // Generated, DO NOT EDIT,
  2. // but copy it to your own project and rename the package.
  3. // See more at http://github.com/klauspost/cpuid
  4. package cpuid
  5. import (
  6. "strings"
  7. )
  8. // Vendor is a representation of a CPU vendor.
  9. type vendor int
  10. const (
  11. other vendor = iota
  12. intel
  13. amd
  14. via
  15. transmeta
  16. nsc
  17. kvm // Kernel-based Virtual Machine
  18. msvm // Microsoft Hyper-V or Windows Virtual PC
  19. vmware
  20. xenhvm
  21. )
  22. const (
  23. cmov = 1 << iota // i686 CMOV
  24. nx // NX (No-Execute) bit
  25. amd3dnow // AMD 3DNOW
  26. amd3dnowext // AMD 3DNowExt
  27. mmx // standard MMX
  28. mmxext // SSE integer functions or AMD MMX ext
  29. sse // SSE functions
  30. sse2 // P4 SSE functions
  31. sse3 // Prescott SSE3 functions
  32. ssse3 // Conroe SSSE3 functions
  33. sse4 // Penryn SSE4.1 functions
  34. sse4a // AMD Barcelona microarchitecture SSE4a instructions
  35. sse42 // Nehalem SSE4.2 functions
  36. avx // AVX functions
  37. avx2 // AVX2 functions
  38. fma3 // Intel FMA 3
  39. fma4 // Bulldozer FMA4 functions
  40. xop // Bulldozer XOP functions
  41. f16c // Half-precision floating-point conversion
  42. bmi1 // Bit Manipulation Instruction Set 1
  43. bmi2 // Bit Manipulation Instruction Set 2
  44. tbm // AMD Trailing Bit Manipulation
  45. lzcnt // LZCNT instruction
  46. popcnt // POPCNT instruction
  47. aesni // Advanced Encryption Standard New Instructions
  48. clmul // Carry-less Multiplication
  49. htt // Hyperthreading (enabled)
  50. hle // Hardware Lock Elision
  51. rtm // Restricted Transactional Memory
  52. rdrand // RDRAND instruction is available
  53. rdseed // RDSEED instruction is available
  54. adx // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  55. sha // Intel SHA Extensions
  56. avx512f // AVX-512 Foundation
  57. avx512dq // AVX-512 Doubleword and Quadword Instructions
  58. avx512ifma // AVX-512 Integer Fused Multiply-Add Instructions
  59. avx512pf // AVX-512 Prefetch Instructions
  60. avx512er // AVX-512 Exponential and Reciprocal Instructions
  61. avx512cd // AVX-512 Conflict Detection Instructions
  62. avx512bw // AVX-512 Byte and Word Instructions
  63. avx512vl // AVX-512 Vector Length Extensions
  64. avx512vbmi // AVX-512 Vector Bit Manipulation Instructions
  65. mpx // Intel MPX (Memory Protection Extensions)
  66. erms // Enhanced REP MOVSB/STOSB
  67. rdtscp // RDTSCP Instruction
  68. cx16 // CMPXCHG16B Instruction
  69. // Performance indicators
  70. sse2slow // SSE2 is supported, but usually not faster
  71. sse3slow // SSE3 is supported, but usually not faster
  72. atom // Atom processor, some SSSE3 instructions are slower
  73. )
  74. var flagNames = map[flags]string{
  75. cmov: "CMOV", // i686 CMOV
  76. nx: "NX", // NX (No-Execute) bit
  77. amd3dnow: "AMD3DNOW", // AMD 3DNOW
  78. amd3dnowext: "AMD3DNOWEXT", // AMD 3DNowExt
  79. mmx: "MMX", // Standard MMX
  80. mmxext: "MMXEXT", // SSE integer functions or AMD MMX ext
  81. sse: "SSE", // SSE functions
  82. sse2: "SSE2", // P4 SSE2 functions
  83. sse3: "SSE3", // Prescott SSE3 functions
  84. ssse3: "SSSE3", // Conroe SSSE3 functions
  85. sse4: "SSE4.1", // Penryn SSE4.1 functions
  86. sse4a: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions
  87. sse42: "SSE4.2", // Nehalem SSE4.2 functions
  88. avx: "AVX", // AVX functions
  89. avx2: "AVX2", // AVX functions
  90. fma3: "FMA3", // Intel FMA 3
  91. fma4: "FMA4", // Bulldozer FMA4 functions
  92. xop: "XOP", // Bulldozer XOP functions
  93. f16c: "F16C", // Half-precision floating-point conversion
  94. bmi1: "BMI1", // Bit Manipulation Instruction Set 1
  95. bmi2: "BMI2", // Bit Manipulation Instruction Set 2
  96. tbm: "TBM", // AMD Trailing Bit Manipulation
  97. lzcnt: "LZCNT", // LZCNT instruction
  98. popcnt: "POPCNT", // POPCNT instruction
  99. aesni: "AESNI", // Advanced Encryption Standard New Instructions
  100. clmul: "CLMUL", // Carry-less Multiplication
  101. htt: "HTT", // Hyperthreading (enabled)
  102. hle: "HLE", // Hardware Lock Elision
  103. rtm: "RTM", // Restricted Transactional Memory
  104. rdrand: "RDRAND", // RDRAND instruction is available
  105. rdseed: "RDSEED", // RDSEED instruction is available
  106. adx: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  107. sha: "SHA", // Intel SHA Extensions
  108. avx512f: "AVX512F", // AVX-512 Foundation
  109. avx512dq: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions
  110. avx512ifma: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions
  111. avx512pf: "AVX512PF", // AVX-512 Prefetch Instructions
  112. avx512er: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions
  113. avx512cd: "AVX512CD", // AVX-512 Conflict Detection Instructions
  114. avx512bw: "AVX512BW", // AVX-512 Byte and Word Instructions
  115. avx512vl: "AVX512VL", // AVX-512 Vector Length Extensions
  116. avx512vbmi: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions
  117. mpx: "MPX", // Intel MPX (Memory Protection Extensions)
  118. erms: "ERMS", // Enhanced REP MOVSB/STOSB
  119. rdtscp: "RDTSCP", // RDTSCP Instruction
  120. cx16: "CX16", // CMPXCHG16B Instruction
  121. // Performance indicators
  122. sse2slow: "SSE2SLOW", // SSE2 supported, but usually not faster
  123. sse3slow: "SSE3SLOW", // SSE3 supported, but usually not faster
  124. atom: "ATOM", // Atom processor, some SSSE3 instructions are slower
  125. }
  126. // CPUInfo contains information about the detected system CPU.
  127. type cpuInfo struct {
  128. brandname string // Brand name reported by the CPU
  129. vendorid vendor // Comparable CPU vendor ID
  130. features flags // Features of the CPU
  131. physicalcores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
  132. threadspercore int // Number of threads per physical core. Will be 1 if undetectable.
  133. logicalcores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
  134. family int // CPU family number
  135. model int // CPU model number
  136. cacheline int // Cache line size in bytes. Will be 0 if undetectable.
  137. cache struct {
  138. l1i int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
  139. l1d int // L1 Data Cache (per core or shared). Will be -1 if undetected
  140. l2 int // L2 Cache (per core or shared). Will be -1 if undetected
  141. l3 int // L3 Instruction Cache (per core or shared). Will be -1 if undetected
  142. }
  143. maxFunc uint32
  144. maxExFunc uint32
  145. }
  146. var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
  147. var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
  148. var xgetbv func(index uint32) (eax, edx uint32)
  149. var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
  150. // CPU contains information about the CPU as detected on startup,
  151. // or when Detect last was called.
  152. //
  153. // Use this as the primary entry point to you data,
  154. // this way queries are
  155. var cpu cpuInfo
  156. func init() {
  157. initCPU()
  158. detect()
  159. }
  160. // Detect will re-detect current CPU info.
  161. // This will replace the content of the exported CPU variable.
  162. //
  163. // Unless you expect the CPU to change while you are running your program
  164. // you should not need to call this function.
  165. // If you call this, you must ensure that no other goroutine is accessing the
  166. // exported CPU variable.
  167. func detect() {
  168. cpu.maxFunc = maxFunctionID()
  169. cpu.maxExFunc = maxExtendedFunction()
  170. cpu.brandname = brandName()
  171. cpu.cacheline = cacheLine()
  172. cpu.family, cpu.model = familyModel()
  173. cpu.features = support()
  174. cpu.threadspercore = threadsPerCore()
  175. cpu.logicalcores = logicalCores()
  176. cpu.physicalcores = physicalCores()
  177. cpu.vendorid = vendorID()
  178. cpu.cacheSize()
  179. }
  180. // Generated here: http://play.golang.org/p/BxFH2Gdc0G
  181. // Cmov indicates support of CMOV instructions
  182. func (c cpuInfo) cmov() bool {
  183. return c.features&cmov != 0
  184. }
  185. // Amd3dnow indicates support of AMD 3DNOW! instructions
  186. func (c cpuInfo) amd3dnow() bool {
  187. return c.features&amd3dnow != 0
  188. }
  189. // Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
  190. func (c cpuInfo) amd3dnowext() bool {
  191. return c.features&amd3dnowext != 0
  192. }
  193. // MMX indicates support of MMX instructions
  194. func (c cpuInfo) mmx() bool {
  195. return c.features&mmx != 0
  196. }
  197. // MMXExt indicates support of MMXEXT instructions
  198. // (SSE integer functions or AMD MMX ext)
  199. func (c cpuInfo) mmxext() bool {
  200. return c.features&mmxext != 0
  201. }
  202. // SSE indicates support of SSE instructions
  203. func (c cpuInfo) sse() bool {
  204. return c.features&sse != 0
  205. }
  206. // SSE2 indicates support of SSE 2 instructions
  207. func (c cpuInfo) sse2() bool {
  208. return c.features&sse2 != 0
  209. }
  210. // SSE3 indicates support of SSE 3 instructions
  211. func (c cpuInfo) sse3() bool {
  212. return c.features&sse3 != 0
  213. }
  214. // SSSE3 indicates support of SSSE 3 instructions
  215. func (c cpuInfo) ssse3() bool {
  216. return c.features&ssse3 != 0
  217. }
  218. // SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
  219. func (c cpuInfo) sse4() bool {
  220. return c.features&sse4 != 0
  221. }
  222. // SSE42 indicates support of SSE4.2 instructions
  223. func (c cpuInfo) sse42() bool {
  224. return c.features&sse42 != 0
  225. }
  226. // AVX indicates support of AVX instructions
  227. // and operating system support of AVX instructions
  228. func (c cpuInfo) avx() bool {
  229. return c.features&avx != 0
  230. }
  231. // AVX2 indicates support of AVX2 instructions
  232. func (c cpuInfo) avx2() bool {
  233. return c.features&avx2 != 0
  234. }
  235. // FMA3 indicates support of FMA3 instructions
  236. func (c cpuInfo) fma3() bool {
  237. return c.features&fma3 != 0
  238. }
  239. // FMA4 indicates support of FMA4 instructions
  240. func (c cpuInfo) fma4() bool {
  241. return c.features&fma4 != 0
  242. }
  243. // XOP indicates support of XOP instructions
  244. func (c cpuInfo) xop() bool {
  245. return c.features&xop != 0
  246. }
  247. // F16C indicates support of F16C instructions
  248. func (c cpuInfo) f16c() bool {
  249. return c.features&f16c != 0
  250. }
  251. // BMI1 indicates support of BMI1 instructions
  252. func (c cpuInfo) bmi1() bool {
  253. return c.features&bmi1 != 0
  254. }
  255. // BMI2 indicates support of BMI2 instructions
  256. func (c cpuInfo) bmi2() bool {
  257. return c.features&bmi2 != 0
  258. }
  259. // TBM indicates support of TBM instructions
  260. // (AMD Trailing Bit Manipulation)
  261. func (c cpuInfo) tbm() bool {
  262. return c.features&tbm != 0
  263. }
  264. // Lzcnt indicates support of LZCNT instruction
  265. func (c cpuInfo) lzcnt() bool {
  266. return c.features&lzcnt != 0
  267. }
  268. // Popcnt indicates support of POPCNT instruction
  269. func (c cpuInfo) popcnt() bool {
  270. return c.features&popcnt != 0
  271. }
  272. // HTT indicates the processor has Hyperthreading enabled
  273. func (c cpuInfo) htt() bool {
  274. return c.features&htt != 0
  275. }
  276. // SSE2Slow indicates that SSE2 may be slow on this processor
  277. func (c cpuInfo) sse2slow() bool {
  278. return c.features&sse2slow != 0
  279. }
  280. // SSE3Slow indicates that SSE3 may be slow on this processor
  281. func (c cpuInfo) sse3slow() bool {
  282. return c.features&sse3slow != 0
  283. }
  284. // AesNi indicates support of AES-NI instructions
  285. // (Advanced Encryption Standard New Instructions)
  286. func (c cpuInfo) aesni() bool {
  287. return c.features&aesni != 0
  288. }
  289. // Clmul indicates support of CLMUL instructions
  290. // (Carry-less Multiplication)
  291. func (c cpuInfo) clmul() bool {
  292. return c.features&clmul != 0
  293. }
  294. // NX indicates support of NX (No-Execute) bit
  295. func (c cpuInfo) nx() bool {
  296. return c.features&nx != 0
  297. }
  298. // SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
  299. func (c cpuInfo) sse4a() bool {
  300. return c.features&sse4a != 0
  301. }
  302. // HLE indicates support of Hardware Lock Elision
  303. func (c cpuInfo) hle() bool {
  304. return c.features&hle != 0
  305. }
  306. // RTM indicates support of Restricted Transactional Memory
  307. func (c cpuInfo) rtm() bool {
  308. return c.features&rtm != 0
  309. }
  310. // Rdrand indicates support of RDRAND instruction is available
  311. func (c cpuInfo) rdrand() bool {
  312. return c.features&rdrand != 0
  313. }
  314. // Rdseed indicates support of RDSEED instruction is available
  315. func (c cpuInfo) rdseed() bool {
  316. return c.features&rdseed != 0
  317. }
  318. // ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  319. func (c cpuInfo) adx() bool {
  320. return c.features&adx != 0
  321. }
  322. // SHA indicates support of Intel SHA Extensions
  323. func (c cpuInfo) sha() bool {
  324. return c.features&sha != 0
  325. }
  326. // AVX512F indicates support of AVX-512 Foundation
  327. func (c cpuInfo) avx512f() bool {
  328. return c.features&avx512f != 0
  329. }
  330. // AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
  331. func (c cpuInfo) avx512dq() bool {
  332. return c.features&avx512dq != 0
  333. }
  334. // AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
  335. func (c cpuInfo) avx512ifma() bool {
  336. return c.features&avx512ifma != 0
  337. }
  338. // AVX512PF indicates support of AVX-512 Prefetch Instructions
  339. func (c cpuInfo) avx512pf() bool {
  340. return c.features&avx512pf != 0
  341. }
  342. // AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
  343. func (c cpuInfo) avx512er() bool {
  344. return c.features&avx512er != 0
  345. }
  346. // AVX512CD indicates support of AVX-512 Conflict Detection Instructions
  347. func (c cpuInfo) avx512cd() bool {
  348. return c.features&avx512cd != 0
  349. }
  350. // AVX512BW indicates support of AVX-512 Byte and Word Instructions
  351. func (c cpuInfo) avx512bw() bool {
  352. return c.features&avx512bw != 0
  353. }
  354. // AVX512VL indicates support of AVX-512 Vector Length Extensions
  355. func (c cpuInfo) avx512vl() bool {
  356. return c.features&avx512vl != 0
  357. }
  358. // AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
  359. func (c cpuInfo) avx512vbmi() bool {
  360. return c.features&avx512vbmi != 0
  361. }
  362. // MPX indicates support of Intel MPX (Memory Protection Extensions)
  363. func (c cpuInfo) mpx() bool {
  364. return c.features&mpx != 0
  365. }
  366. // ERMS indicates support of Enhanced REP MOVSB/STOSB
  367. func (c cpuInfo) erms() bool {
  368. return c.features&erms != 0
  369. }
  370. func (c cpuInfo) rdtscp() bool {
  371. return c.features&rdtscp != 0
  372. }
  373. func (c cpuInfo) cx16() bool {
  374. return c.features&cx16 != 0
  375. }
  376. // Atom indicates an Atom processor
  377. func (c cpuInfo) atom() bool {
  378. return c.features&atom != 0
  379. }
  380. // Intel returns true if vendor is recognized as Intel
  381. func (c cpuInfo) intel() bool {
  382. return c.vendorid == intel
  383. }
  384. // AMD returns true if vendor is recognized as AMD
  385. func (c cpuInfo) amd() bool {
  386. return c.vendorid == amd
  387. }
  388. // Transmeta returns true if vendor is recognized as Transmeta
  389. func (c cpuInfo) transmeta() bool {
  390. return c.vendorid == transmeta
  391. }
  392. // NSC returns true if vendor is recognized as National Semiconductor
  393. func (c cpuInfo) nsc() bool {
  394. return c.vendorid == nsc
  395. }
  396. // VIA returns true if vendor is recognized as VIA
  397. func (c cpuInfo) via() bool {
  398. return c.vendorid == via
  399. }
  400. // RTCounter returns the 64-bit time-stamp counter
  401. // Uses the RDTSCP instruction. The value 0 is returned
  402. // if the CPU does not support the instruction.
  403. func (c cpuInfo) rtcounter() uint64 {
  404. if !c.rdtscp() {
  405. return 0
  406. }
  407. a, _, _, d := rdtscpAsm()
  408. return uint64(a) | (uint64(d) << 32)
  409. }
  410. // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
  411. // This variable is OS dependent, but on Linux contains information
  412. // about the current cpu/core the code is running on.
  413. // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
  414. func (c cpuInfo) ia32tscaux() uint32 {
  415. if !c.rdtscp() {
  416. return 0
  417. }
  418. _, _, ecx, _ := rdtscpAsm()
  419. return ecx
  420. }
  421. // LogicalCPU will return the Logical CPU the code is currently executing on.
  422. // This is likely to change when the OS re-schedules the running thread
  423. // to another CPU.
  424. // If the current core cannot be detected, -1 will be returned.
  425. func (c cpuInfo) logicalcpu() int {
  426. if c.maxFunc < 1 {
  427. return -1
  428. }
  429. _, ebx, _, _ := cpuid(1)
  430. return int(ebx >> 24)
  431. }
  432. // VM Will return true if the cpu id indicates we are in
  433. // a virtual machine. This is only a hint, and will very likely
  434. // have many false negatives.
  435. func (c cpuInfo) vm() bool {
  436. switch c.vendorid {
  437. case msvm, kvm, vmware, xenhvm:
  438. return true
  439. }
  440. return false
  441. }
  442. // Flags contains detected cpu features and caracteristics
  443. type flags uint64
  444. // String returns a string representation of the detected
  445. // CPU features.
  446. func (f flags) String() string {
  447. return strings.Join(f.strings(), ",")
  448. }
  449. // Strings returns and array of the detected features.
  450. func (f flags) strings() []string {
  451. s := support()
  452. r := make([]string, 0, 20)
  453. for i := uint(0); i < 64; i++ {
  454. key := flags(1 << i)
  455. val := flagNames[key]
  456. if s&key != 0 {
  457. r = append(r, val)
  458. }
  459. }
  460. return r
  461. }
  462. func maxExtendedFunction() uint32 {
  463. eax, _, _, _ := cpuid(0x80000000)
  464. return eax
  465. }
  466. func maxFunctionID() uint32 {
  467. a, _, _, _ := cpuid(0)
  468. return a
  469. }
  470. func brandName() string {
  471. if maxExtendedFunction() >= 0x80000004 {
  472. v := make([]uint32, 0, 48)
  473. for i := uint32(0); i < 3; i++ {
  474. a, b, c, d := cpuid(0x80000002 + i)
  475. v = append(v, a, b, c, d)
  476. }
  477. return strings.Trim(string(valAsString(v...)), " ")
  478. }
  479. return "unknown"
  480. }
  481. func threadsPerCore() int {
  482. mfi := maxFunctionID()
  483. if mfi < 0x4 || vendorID() != intel {
  484. return 1
  485. }
  486. if mfi < 0xb {
  487. _, b, _, d := cpuid(1)
  488. if (d & (1 << 28)) != 0 {
  489. // v will contain logical core count
  490. v := (b >> 16) & 255
  491. if v > 1 {
  492. a4, _, _, _ := cpuid(4)
  493. // physical cores
  494. v2 := (a4 >> 26) + 1
  495. if v2 > 0 {
  496. return int(v) / int(v2)
  497. }
  498. }
  499. }
  500. return 1
  501. }
  502. _, b, _, _ := cpuidex(0xb, 0)
  503. if b&0xffff == 0 {
  504. return 1
  505. }
  506. return int(b & 0xffff)
  507. }
  508. func logicalCores() int {
  509. mfi := maxFunctionID()
  510. switch vendorID() {
  511. case intel:
  512. // Use this on old Intel processors
  513. if mfi < 0xb {
  514. if mfi < 1 {
  515. return 0
  516. }
  517. // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
  518. // that can be assigned to logical processors in a physical package.
  519. // The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
  520. _, ebx, _, _ := cpuid(1)
  521. logical := (ebx >> 16) & 0xff
  522. return int(logical)
  523. }
  524. _, b, _, _ := cpuidex(0xb, 1)
  525. return int(b & 0xffff)
  526. case amd:
  527. _, b, _, _ := cpuid(1)
  528. return int((b >> 16) & 0xff)
  529. default:
  530. return 0
  531. }
  532. }
  533. func familyModel() (int, int) {
  534. if maxFunctionID() < 0x1 {
  535. return 0, 0
  536. }
  537. eax, _, _, _ := cpuid(1)
  538. family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
  539. model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
  540. return int(family), int(model)
  541. }
  542. func physicalCores() int {
  543. switch vendorID() {
  544. case intel:
  545. return logicalCores() / threadsPerCore()
  546. case amd:
  547. if maxExtendedFunction() >= 0x80000008 {
  548. _, _, c, _ := cpuid(0x80000008)
  549. return int(c&0xff) + 1
  550. }
  551. }
  552. return 0
  553. }
  554. // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
  555. var vendorMapping = map[string]vendor{
  556. "AMDisbetter!": amd,
  557. "AuthenticAMD": amd,
  558. "CentaurHauls": via,
  559. "GenuineIntel": intel,
  560. "TransmetaCPU": transmeta,
  561. "GenuineTMx86": transmeta,
  562. "Geode by NSC": nsc,
  563. "VIA VIA VIA ": via,
  564. "KVMKVMKVMKVM": kvm,
  565. "Microsoft Hv": msvm,
  566. "VMwareVMware": vmware,
  567. "XenVMMXenVMM": xenhvm,
  568. }
  569. func vendorID() vendor {
  570. _, b, c, d := cpuid(0)
  571. v := valAsString(b, d, c)
  572. vend, ok := vendorMapping[string(v)]
  573. if !ok {
  574. return other
  575. }
  576. return vend
  577. }
  578. func cacheLine() int {
  579. if maxFunctionID() < 0x1 {
  580. return 0
  581. }
  582. _, ebx, _, _ := cpuid(1)
  583. cache := (ebx & 0xff00) >> 5 // cflush size
  584. if cache == 0 && maxExtendedFunction() >= 0x80000006 {
  585. _, _, ecx, _ := cpuid(0x80000006)
  586. cache = ecx & 0xff // cacheline size
  587. }
  588. // TODO: Read from Cache and TLB Information
  589. return int(cache)
  590. }
  591. func (c *cpuInfo) cacheSize() {
  592. c.cache.l1d = -1
  593. c.cache.l1i = -1
  594. c.cache.l2 = -1
  595. c.cache.l3 = -1
  596. vendor := vendorID()
  597. switch vendor {
  598. case intel:
  599. if maxFunctionID() < 4 {
  600. return
  601. }
  602. for i := uint32(0); ; i++ {
  603. eax, ebx, ecx, _ := cpuidex(4, i)
  604. cacheType := eax & 15
  605. if cacheType == 0 {
  606. break
  607. }
  608. cacheLevel := (eax >> 5) & 7
  609. coherency := int(ebx&0xfff) + 1
  610. partitions := int((ebx>>12)&0x3ff) + 1
  611. associativity := int((ebx>>22)&0x3ff) + 1
  612. sets := int(ecx) + 1
  613. size := associativity * partitions * coherency * sets
  614. switch cacheLevel {
  615. case 1:
  616. if cacheType == 1 {
  617. // 1 = Data Cache
  618. c.cache.l1d = size
  619. } else if cacheType == 2 {
  620. // 2 = Instruction Cache
  621. c.cache.l1i = size
  622. } else {
  623. if c.cache.l1d < 0 {
  624. c.cache.l1i = size
  625. }
  626. if c.cache.l1i < 0 {
  627. c.cache.l1i = size
  628. }
  629. }
  630. case 2:
  631. c.cache.l2 = size
  632. case 3:
  633. c.cache.l3 = size
  634. }
  635. }
  636. case amd:
  637. // Untested.
  638. if maxExtendedFunction() < 0x80000005 {
  639. return
  640. }
  641. _, _, ecx, edx := cpuid(0x80000005)
  642. c.cache.l1d = int(((ecx >> 24) & 0xFF) * 1024)
  643. c.cache.l1i = int(((edx >> 24) & 0xFF) * 1024)
  644. if maxExtendedFunction() < 0x80000006 {
  645. return
  646. }
  647. _, _, ecx, _ = cpuid(0x80000006)
  648. c.cache.l2 = int(((ecx >> 16) & 0xFFFF) * 1024)
  649. }
  650. return
  651. }
  652. func support() flags {
  653. mfi := maxFunctionID()
  654. vend := vendorID()
  655. if mfi < 0x1 {
  656. return 0
  657. }
  658. rval := uint64(0)
  659. _, _, c, d := cpuid(1)
  660. if (d & (1 << 15)) != 0 {
  661. rval |= cmov
  662. }
  663. if (d & (1 << 23)) != 0 {
  664. rval |= mmx
  665. }
  666. if (d & (1 << 25)) != 0 {
  667. rval |= mmxext
  668. }
  669. if (d & (1 << 25)) != 0 {
  670. rval |= sse
  671. }
  672. if (d & (1 << 26)) != 0 {
  673. rval |= sse2
  674. }
  675. if (c & 1) != 0 {
  676. rval |= sse3
  677. }
  678. if (c & 0x00000200) != 0 {
  679. rval |= ssse3
  680. }
  681. if (c & 0x00080000) != 0 {
  682. rval |= sse4
  683. }
  684. if (c & 0x00100000) != 0 {
  685. rval |= sse42
  686. }
  687. if (c & (1 << 25)) != 0 {
  688. rval |= aesni
  689. }
  690. if (c & (1 << 1)) != 0 {
  691. rval |= clmul
  692. }
  693. if c&(1<<23) != 0 {
  694. rval |= popcnt
  695. }
  696. if c&(1<<30) != 0 {
  697. rval |= rdrand
  698. }
  699. if c&(1<<29) != 0 {
  700. rval |= f16c
  701. }
  702. if c&(1<<13) != 0 {
  703. rval |= cx16
  704. }
  705. if vend == intel && (d&(1<<28)) != 0 && mfi >= 4 {
  706. if threadsPerCore() > 1 {
  707. rval |= htt
  708. }
  709. }
  710. // Check XGETBV, OXSAVE and AVX bits
  711. if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
  712. // Check for OS support
  713. eax, _ := xgetbv(0)
  714. if (eax & 0x6) == 0x6 {
  715. rval |= avx
  716. if (c & 0x00001000) != 0 {
  717. rval |= fma3
  718. }
  719. }
  720. }
  721. // Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
  722. if mfi >= 7 {
  723. _, ebx, ecx, _ := cpuidex(7, 0)
  724. if (rval&avx) != 0 && (ebx&0x00000020) != 0 {
  725. rval |= avx2
  726. }
  727. if (ebx & 0x00000008) != 0 {
  728. rval |= bmi1
  729. if (ebx & 0x00000100) != 0 {
  730. rval |= bmi2
  731. }
  732. }
  733. if ebx&(1<<4) != 0 {
  734. rval |= hle
  735. }
  736. if ebx&(1<<9) != 0 {
  737. rval |= erms
  738. }
  739. if ebx&(1<<11) != 0 {
  740. rval |= rtm
  741. }
  742. if ebx&(1<<14) != 0 {
  743. rval |= mpx
  744. }
  745. if ebx&(1<<18) != 0 {
  746. rval |= rdseed
  747. }
  748. if ebx&(1<<19) != 0 {
  749. rval |= adx
  750. }
  751. if ebx&(1<<29) != 0 {
  752. rval |= sha
  753. }
  754. // Only detect AVX-512 features if XGETBV is supported
  755. if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
  756. // Check for OS support
  757. eax, _ := xgetbv(0)
  758. // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
  759. // ZMM16-ZMM31 state are enabled by OS)
  760. /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
  761. if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
  762. if ebx&(1<<16) != 0 {
  763. rval |= avx512f
  764. }
  765. if ebx&(1<<17) != 0 {
  766. rval |= avx512dq
  767. }
  768. if ebx&(1<<21) != 0 {
  769. rval |= avx512ifma
  770. }
  771. if ebx&(1<<26) != 0 {
  772. rval |= avx512pf
  773. }
  774. if ebx&(1<<27) != 0 {
  775. rval |= avx512er
  776. }
  777. if ebx&(1<<28) != 0 {
  778. rval |= avx512cd
  779. }
  780. if ebx&(1<<30) != 0 {
  781. rval |= avx512bw
  782. }
  783. if ebx&(1<<31) != 0 {
  784. rval |= avx512vl
  785. }
  786. // ecx
  787. if ecx&(1<<1) != 0 {
  788. rval |= avx512vbmi
  789. }
  790. }
  791. }
  792. }
  793. if maxExtendedFunction() >= 0x80000001 {
  794. _, _, c, d := cpuid(0x80000001)
  795. if (c & (1 << 5)) != 0 {
  796. rval |= lzcnt
  797. rval |= popcnt
  798. }
  799. if (d & (1 << 31)) != 0 {
  800. rval |= amd3dnow
  801. }
  802. if (d & (1 << 30)) != 0 {
  803. rval |= amd3dnowext
  804. }
  805. if (d & (1 << 23)) != 0 {
  806. rval |= mmx
  807. }
  808. if (d & (1 << 22)) != 0 {
  809. rval |= mmxext
  810. }
  811. if (c & (1 << 6)) != 0 {
  812. rval |= sse4a
  813. }
  814. if d&(1<<20) != 0 {
  815. rval |= nx
  816. }
  817. if d&(1<<27) != 0 {
  818. rval |= rdtscp
  819. }
  820. /* Allow for selectively disabling SSE2 functions on AMD processors
  821. with SSE2 support but not SSE4a. This includes Athlon64, some
  822. Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
  823. than SSE2 often enough to utilize this special-case flag.
  824. AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
  825. so that SSE2 is used unless explicitly disabled by checking
  826. AV_CPU_FLAG_SSE2SLOW. */
  827. if vendorID() != intel &&
  828. rval&sse2 != 0 && (c&0x00000040) == 0 {
  829. rval |= sse2slow
  830. }
  831. /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
  832. * used unless the OS has AVX support. */
  833. if (rval & avx) != 0 {
  834. if (c & 0x00000800) != 0 {
  835. rval |= xop
  836. }
  837. if (c & 0x00010000) != 0 {
  838. rval |= fma4
  839. }
  840. }
  841. if vendorID() == intel {
  842. family, model := familyModel()
  843. if family == 6 && (model == 9 || model == 13 || model == 14) {
  844. /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
  845. * 6/14 (core1 "yonah") theoretically support sse2, but it's
  846. * usually slower than mmx. */
  847. if (rval & sse2) != 0 {
  848. rval |= sse2slow
  849. }
  850. if (rval & sse3) != 0 {
  851. rval |= sse3slow
  852. }
  853. }
  854. /* The Atom processor has SSSE3 support, which is useful in many cases,
  855. * but sometimes the SSSE3 version is slower than the SSE2 equivalent
  856. * on the Atom, but is generally faster on other processors supporting
  857. * SSSE3. This flag allows for selectively disabling certain SSSE3
  858. * functions on the Atom. */
  859. if family == 6 && model == 28 {
  860. rval |= atom
  861. }
  862. }
  863. }
  864. return flags(rval)
  865. }
  866. func valAsString(values ...uint32) []byte {
  867. r := make([]byte, 4*len(values))
  868. for i, v := range values {
  869. dst := r[i*4:]
  870. dst[0] = byte(v & 0xff)
  871. dst[1] = byte((v >> 8) & 0xff)
  872. dst[2] = byte((v >> 16) & 0xff)
  873. dst[3] = byte((v >> 24) & 0xff)
  874. switch {
  875. case dst[0] == 0:
  876. return r[:i*4]
  877. case dst[1] == 0:
  878. return r[:i*4+1]
  879. case dst[2] == 0:
  880. return r[:i*4+2]
  881. case dst[3] == 0:
  882. return r[:i*4+3]
  883. }
  884. }
  885. return r
  886. }