import-database.go 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. // Tools to import a avatar key & name database in CSV format into a KV database.
  2. package main
  3. import (
  4. "compress/bzip2"
  5. "compress/gzip"
  6. "encoding/csv"
  7. "encoding/json"
  8. "io"
  9. "os"
  10. "runtime"
  11. "time"
  12. "github.com/dgraph-io/badger/v3"
  13. "github.com/h2non/filetype"
  14. "github.com/h2non/filetype/matchers"
  15. "github.com/syndtr/goleveldb/leveldb"
  16. "github.com/syndtr/goleveldb/leveldb/util"
  17. "github.com/tidwall/buntdb"
  18. )
  19. // importDatabase is essentially reading a bzip2'ed CSV file with UUID,AvatarName downloaded from http://w-hat.com/#name2key .
  20. //
  21. // One could theoretically set a cron job to get this file, save it on disk periodically, and keep the database up-to-date.
  22. // See https://stackoverflow.com/questions/24673335/how-do-i-read-a-gzipped-csv-file for the actual usage of these complicated things!
  23. func importDatabase(filename string) {
  24. filehandler, err := os.Open(filename)
  25. if err != nil {
  26. log.Fatal(err)
  27. }
  28. defer filehandler.Close()
  29. // First, check if we _do_ have a gzipped file or not...
  30. // We'll use a small library for that (gwyneth 20211027)
  31. // We only have to pass the file header = first 261 bytes
  32. head := make([]byte, 261)
  33. _, err = filehandler.Read(head)
  34. checkErr(err)
  35. kind, err := filetype.Match(head)
  36. checkErr(err)
  37. // Now rewind the file to the start. (gwyneth 20211028)
  38. position, err := filehandler.Seek(0, 0)
  39. if position != 0 || err != nil {
  40. log.Error("could not rewind the file to the start position")
  41. }
  42. var cr *csv.Reader // CSV reader needs to be declared here because of scope issues. (gwyneth 20211027)
  43. // Technically, we could match for a lot of archives and get a io.Reader for each.
  44. // However, W-Hat has a limited selection of archives available (currently gzip and bzip2)
  45. // so we limit ourselves to these two, falling back to plaintext (gwyneth 20211027).
  46. switch kind {
  47. case matchers.TypeBz2:
  48. gr := bzip2.NewReader(filehandler) // open bzip2 reader
  49. cr = csv.NewReader(gr) // open csv reader and feed the bzip2 reader into it
  50. case matchers.TypeGz:
  51. zr, err := gzip.NewReader(filehandler) // open gzip reader
  52. checkErr(err)
  53. cr = csv.NewReader(zr) // open csv reader and feed the bzip2 reader into it
  54. default:
  55. // We just assume that it's a CSV (uncompressed) file and open it.
  56. cr = csv.NewReader(filehandler)
  57. }
  58. limit := 0 // outside of for loop so that we can count how many entries we had in total
  59. BATCH_BLOCK := goslConfig.BATCH_BLOCK // saving a few array calls...
  60. loopBatch := goslConfig.loopBatch // define statically up here.
  61. time_start := time.Now() // we want to get an idea on how long this takes
  62. switch goslConfig.database {
  63. case "badger":
  64. // prepare connection to KV database
  65. kv, err := badger.Open(Opt)
  66. checkErrPanic(err) // should probably panic
  67. defer kv.Close()
  68. txn := kv.NewTransaction(true) // start new transaction; we will commit only every BATCH_BLOCK entries
  69. defer txn.Discard()
  70. for ; ; limit++ {
  71. record, err := cr.Read()
  72. if err == io.EOF {
  73. break
  74. } else if err != nil {
  75. log.Fatal(err)
  76. }
  77. // CSV: first entry is avatar key UUID, second entry is avatar name.
  78. // We probably should check for valid UUIDs; we may do that at some point. (gwyneth 20211031)
  79. jsonNewEntry, err := json.Marshal(avatarUUID{record[1], record[0], "Production"}) // W-Hat keys come all from the main LL grid, known as 'Production'
  80. if err != nil {
  81. log.Warning(err)
  82. } else {
  83. if limit % loopBatch == 0 {
  84. log.Debugf("Entry %04d - Name: %s UUID: %s - JSON: %s\n", limit, record[1], record[0], jsonNewEntry)
  85. }
  86. // Place this record under the avatar's name
  87. if err = txn.Set([]byte(record[1]), jsonNewEntry); err != nil {
  88. log.Fatal(err)
  89. }
  90. // Now place it again, this time under the avatar's key
  91. if err = txn.Set([]byte(record[0]), jsonNewEntry); err != nil {
  92. log.Fatal(err)
  93. }
  94. }
  95. if limit % BATCH_BLOCK == 0 && limit != 0 { // we do not run on the first time, and then only every BATCH_BLOCK times
  96. log.Info("processing:", limit)
  97. if err = txn.Commit(); err != nil {
  98. log.Fatal(err)
  99. }
  100. runtime.GC()
  101. txn = kv.NewTransaction(true) // start a new transaction
  102. defer txn.Discard()
  103. }
  104. }
  105. // commit last batch
  106. if err = txn.Commit(); err != nil {
  107. log.Fatal(err)
  108. }
  109. case "buntdb":
  110. db, err := buntdb.Open(goslConfig.dbNamePath)
  111. checkErrPanic(err)
  112. defer db.Close()
  113. txn, err := db.Begin(true)
  114. checkErrPanic(err)
  115. //defer txn.Commit()
  116. // very similar to Badger code...
  117. for ; ; limit++ {
  118. record, err := cr.Read()
  119. if err == io.EOF {
  120. break
  121. } else if err != nil {
  122. log.Fatal(err)
  123. }
  124. jsonNewEntry, err := json.Marshal(avatarUUID{record[1], record[0], "Production"})
  125. if err != nil {
  126. log.Warning(err)
  127. } else {
  128. // see comments above for Badger. (gwyneth 20211031)
  129. _, _, err = txn.Set(record[1], string(jsonNewEntry), nil)
  130. if err != nil {
  131. log.Fatal(err)
  132. }
  133. _, _, err = txn.Set(record[0], string(jsonNewEntry), nil)
  134. if err != nil {
  135. log.Fatal(err)
  136. }
  137. }
  138. if limit % loopBatch == 0 {
  139. log.Debugf("Entry %04d - Name: %s UUID: %s - JSON: %s\n", limit, record[1], record[0], jsonNewEntry)
  140. }
  141. if limit % BATCH_BLOCK == 0 && limit != 0 { // we do not run on the first time, and then only every BATCH_BLOCK times
  142. log.Info("processing:", limit)
  143. if err = txn.Commit(); err != nil {
  144. log.Fatal(err)
  145. }
  146. runtime.GC()
  147. txn, err = db.Begin(true) // start a new transaction
  148. checkErrPanic(err)
  149. //defer txn.Commit()
  150. }
  151. }
  152. // commit last batch
  153. if err = txn.Commit(); err != nil {
  154. log.Fatal(err)
  155. }
  156. db.Shrink()
  157. case "leveldb":
  158. db, err := leveldb.OpenFile(goslConfig.dbNamePath, nil)
  159. checkErrPanic(err)
  160. defer db.Close()
  161. batch := new(leveldb.Batch)
  162. for ; ; limit++ {
  163. record, err := cr.Read()
  164. if err == io.EOF {
  165. break
  166. } else if err != nil {
  167. log.Fatal(err)
  168. }
  169. jsonNewEntry, err := json.Marshal(avatarUUID{record[1], record[0], "Production"})
  170. if err != nil {
  171. log.Warning(err)
  172. } else {
  173. // see comments above for Badger. (gwyneth 20211031)
  174. batch.Put([]byte(record[1]), jsonNewEntry)
  175. batch.Put([]byte(record[0]), jsonNewEntry)
  176. }
  177. if limit % loopBatch == 0 {
  178. log.Debugf("Entry %04d - Name: %s UUID: %s - JSON: %s\n", limit, record[1], record[0], jsonNewEntry)
  179. }
  180. if limit % BATCH_BLOCK == 0 && limit != 0 {
  181. log.Info("processing:", limit)
  182. if err = db.Write(batch, nil); err != nil {
  183. log.Fatal(err)
  184. }
  185. batch.Reset() // unlike the others, we don't need to create a new batch every time
  186. runtime.GC() // it never hurts...
  187. }
  188. }
  189. // commit last batch
  190. if err = db.Write(batch, nil); err != nil {
  191. log.Fatal(err)
  192. }
  193. batch.Reset() // reset it and let the garbage collector run
  194. runtime.GC()
  195. db.CompactRange(util.Range{Start: nil, Limit: nil})
  196. }
  197. log.Info("total read", limit, "records (or thereabouts) in", time.Since(time_start))
  198. }