import-database.go 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. // Tools to import a avatar key & name database in CSV format into a KV database.
  2. package main
  3. import (
  4. "compress/bzip2"
  5. "compress/gzip"
  6. "encoding/csv"
  7. "encoding/json"
  8. "io"
  9. "os"
  10. "runtime"
  11. "time"
  12. "github.com/dgraph-io/badger/v3"
  13. "github.com/h2non/filetype"
  14. "github.com/h2non/filetype/matchers"
  15. "github.com/syndtr/goleveldb/leveldb"
  16. "github.com/syndtr/goleveldb/leveldb/util"
  17. "github.com/tidwall/buntdb"
  18. )
  19. // importDatabase is essentially reading a bzip2'ed CSV file with UUID,AvatarName downloaded from http://w-hat.com/#name2key .
  20. //
  21. // One could theoretically set a cron job to get this file, save it on disk periodically, and keep the database up-to-date
  22. // see https://stackoverflow.com/questions/24673335/how-do-i-read-a-gzipped-csv-file for the actual usage of these complicated things!
  23. func importDatabase(filename string) {
  24. filehandler, err := os.Open(filename)
  25. if err != nil {
  26. log.Fatal(err)
  27. }
  28. defer filehandler.Close()
  29. // First, check if we _do_ have a gzipped file or not...
  30. // We'll use a small library for that (gwyneth 20211027)
  31. // We only have to pass the file header = first 261 bytes
  32. head := make([]byte, 261)
  33. _, err = filehandler.Read(head)
  34. checkErr(err)
  35. kind, err := filetype.Match(head)
  36. checkErr(err)
  37. // Now rewind the file to the start. (gwyneth 20211028)
  38. position, err := filehandler.Seek(0, 0)
  39. if position != 0 || err != nil {
  40. log.Error("could not rewind the file to the start position")
  41. }
  42. var cr *csv.Reader // CSV reader needs to be declared here because of scope issues. (gwyneth 20211027)
  43. // Technically, we could match for a lot of archives and get a io.Reader for each.
  44. // However, W-Hat has a limited selection of archives available (currently gzip and bzip2)
  45. // so we limit ourselves to these two, falling back to plaintext (gwyneth 20211027).
  46. switch kind {
  47. case matchers.TypeBz2:
  48. gr := bzip2.NewReader(filehandler) // open bzip2 reader
  49. cr = csv.NewReader(gr) // open csv reader and feed the bzip2 reader into it
  50. case matchers.TypeGz:
  51. zr, err := gzip.NewReader(filehandler) // open gzip reader
  52. checkErr(err)
  53. cr = csv.NewReader(zr) // open csv reader and feed the bzip2 reader into it
  54. default:
  55. // We just assume that it's a CSV (uncompressed) file and open it.
  56. cr = csv.NewReader(filehandler)
  57. }
  58. limit := 0 // outside of for loop so that we can count how many entries we had in total
  59. time_start := time.Now() // we want to get an idea on how long this takes
  60. switch goslConfig.database {
  61. case "badger":
  62. // prepare connection to KV database
  63. kv, err := badger.Open(Opt)
  64. checkErrPanic(err) // should probably panic
  65. defer kv.Close()
  66. txn := kv.NewTransaction(true) // start new transaction; we will commit only every BATCH_BLOCK entries
  67. defer txn.Discard()
  68. for ; ; limit++ {
  69. record, err := cr.Read()
  70. if err == io.EOF {
  71. break
  72. } else if err != nil {
  73. log.Fatal(err)
  74. }
  75. // CSV: first entry is avatar key UUID, second entry is avatar name.
  76. // We probably should check for valid UUIDs; we may do that at some point. (gwyneth 20211031)
  77. jsonNewEntry, err := json.Marshal(avatarUUID{record[1], record[0], "Production"}) // W-Hat keys come all from the main LL grid, known as 'Production'
  78. if err != nil {
  79. log.Warning(err)
  80. } else {
  81. log.Debugf("Entry %04d - Name: %s UUID: %s - JSON: %s\n", limit, record[1], record[0], jsonNewEntry)
  82. // Place this record under the avatar's name
  83. if err = txn.Set([]byte(record[1]), jsonNewEntry); err != nil {
  84. log.Fatal(err)
  85. }
  86. // Now place it again, this time under the avatar's key
  87. if err = txn.Set([]byte(record[0]), jsonNewEntry); err != nil {
  88. log.Fatal(err)
  89. }
  90. }
  91. if limit%goslConfig.BATCH_BLOCK == 0 && limit != 0 { // we do not run on the first time, and then only every BATCH_BLOCK times
  92. log.Info("processing:", limit)
  93. if err = txn.Commit(); err != nil {
  94. log.Fatal(err)
  95. }
  96. runtime.GC()
  97. txn = kv.NewTransaction(true) // start a new transaction
  98. defer txn.Discard()
  99. }
  100. }
  101. // commit last batch
  102. if err = txn.Commit(); err != nil {
  103. log.Fatal(err)
  104. }
  105. case "buntdb":
  106. db, err := buntdb.Open(goslConfig.dbNamePath)
  107. checkErrPanic(err)
  108. defer db.Close()
  109. txn, err := db.Begin(true)
  110. checkErrPanic(err)
  111. //defer txn.Commit()
  112. // very similar to Badger code...
  113. for ; ; limit++ {
  114. record, err := cr.Read()
  115. if err == io.EOF {
  116. break
  117. } else if err != nil {
  118. log.Fatal(err)
  119. }
  120. jsonNewEntry, err := json.Marshal(avatarUUID{record[1], record[0], "Production"})
  121. if err != nil {
  122. log.Warning(err)
  123. } else {
  124. // see comments above for Badger. (gwyneth 20211031)
  125. _, _, err = txn.Set(record[1], string(jsonNewEntry), nil)
  126. if err != nil {
  127. log.Fatal(err)
  128. }
  129. _, _, err = txn.Set(record[0], string(jsonNewEntry), nil)
  130. if err != nil {
  131. log.Fatal(err)
  132. }
  133. }
  134. if limit%goslConfig.BATCH_BLOCK == 0 && limit != 0 { // we do not run on the first time, and then only every BATCH_BLOCK times
  135. log.Info("processing:", limit)
  136. if err = txn.Commit(); err != nil {
  137. log.Fatal(err)
  138. }
  139. runtime.GC()
  140. txn, err = db.Begin(true) // start a new transaction
  141. checkErrPanic(err)
  142. //defer txn.Commit()
  143. }
  144. }
  145. // commit last batch
  146. if err = txn.Commit(); err != nil {
  147. log.Fatal(err)
  148. }
  149. db.Shrink()
  150. case "leveldb":
  151. db, err := leveldb.OpenFile(goslConfig.dbNamePath, nil)
  152. checkErrPanic(err)
  153. defer db.Close()
  154. batch := new(leveldb.Batch)
  155. for ; ; limit++ {
  156. record, err := cr.Read()
  157. if err == io.EOF {
  158. break
  159. } else if err != nil {
  160. log.Fatal(err)
  161. }
  162. jsonNewEntry, err := json.Marshal(avatarUUID{record[1], record[0], "Production"})
  163. if err != nil {
  164. log.Warning(err)
  165. } else {
  166. // see comments above for Badger. (gwyneth 20211031)
  167. batch.Put([]byte(record[1]), jsonNewEntry)
  168. batch.Put([]byte(record[0]), jsonNewEntry)
  169. }
  170. if limit%goslConfig.BATCH_BLOCK == 0 && limit != 0 {
  171. log.Info("processing:", limit)
  172. if err = db.Write(batch, nil); err != nil {
  173. log.Fatal(err)
  174. }
  175. batch.Reset() // unlike the others, we don't need to create a new batch every time
  176. runtime.GC() // it never hurts...
  177. }
  178. }
  179. // commit last batch
  180. if err = db.Write(batch, nil); err != nil {
  181. log.Fatal(err)
  182. }
  183. batch.Reset() // reset it and let the garbage collector run
  184. runtime.GC()
  185. db.CompactRange(util.Range{Start: nil, Limit: nil})
  186. }
  187. log.Info("total read", limit, "records (or thereabouts) in", time.Since(time_start))
  188. }