123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211 |
- // Tools to import a avatar key & name database in CSV format into a KV database.
- package main
- import (
- "compress/bzip2"
- "compress/gzip"
- "encoding/csv"
- "encoding/json"
- "io"
- "os"
- "runtime"
- "time"
- "github.com/dgraph-io/badger/v3"
- "github.com/h2non/filetype"
- "github.com/h2non/filetype/matchers"
- "github.com/syndtr/goleveldb/leveldb"
- "github.com/syndtr/goleveldb/leveldb/util"
- "github.com/tidwall/buntdb"
- )
- // importDatabase is essentially reading a bzip2'ed CSV file with UUID,AvatarName downloaded from http://w-hat.com/#name2key .
- //
- // One could theoretically set a cron job to get this file, save it on disk periodically, and keep the database up-to-date.
- // See https://stackoverflow.com/questions/24673335/how-do-i-read-a-gzipped-csv-file for the actual usage of these complicated things!
- func importDatabase(filename string) {
- filehandler, err := os.Open(filename)
- if err != nil {
- log.Fatal(err)
- }
- defer filehandler.Close()
- // First, check if we _do_ have a gzipped file or not...
- // We'll use a small library for that (gwyneth 20211027)
- // We only have to pass the file header = first 261 bytes
- head := make([]byte, 261)
- _, err = filehandler.Read(head)
- checkErr(err)
- kind, err := filetype.Match(head)
- checkErr(err)
- // Now rewind the file to the start. (gwyneth 20211028)
- position, err := filehandler.Seek(0, 0)
- if position != 0 || err != nil {
- log.Error("could not rewind the file to the start position")
- }
- var cr *csv.Reader // CSV reader needs to be declared here because of scope issues. (gwyneth 20211027)
- // Technically, we could match for a lot of archives and get a io.Reader for each.
- // However, W-Hat has a limited selection of archives available (currently gzip and bzip2)
- // so we limit ourselves to these two, falling back to plaintext (gwyneth 20211027).
- switch kind {
- case matchers.TypeBz2:
- gr := bzip2.NewReader(filehandler) // open bzip2 reader
- cr = csv.NewReader(gr) // open csv reader and feed the bzip2 reader into it
- case matchers.TypeGz:
- zr, err := gzip.NewReader(filehandler) // open gzip reader
- checkErr(err)
- cr = csv.NewReader(zr) // open csv reader and feed the bzip2 reader into it
- default:
- // We just assume that it's a CSV (uncompressed) file and open it.
- cr = csv.NewReader(filehandler)
- }
- limit := 0 // outside of for loop so that we can count how many entries we had in total
- BATCH_BLOCK := goslConfig.BATCH_BLOCK // saving a few array calls...
- loopBatch := goslConfig.loopBatch // define statically up here.
- time_start := time.Now() // we want to get an idea on how long this takes
- switch goslConfig.database {
- case "badger":
- // prepare connection to KV database
- kv, err := badger.Open(Opt)
- checkErrPanic(err) // should probably panic
- defer kv.Close()
- txn := kv.NewTransaction(true) // start new transaction; we will commit only every BATCH_BLOCK entries
- defer txn.Discard()
- for ; ; limit++ {
- record, err := cr.Read()
- if err == io.EOF {
- break
- } else if err != nil {
- log.Fatal(err)
- }
- // CSV: first entry is avatar key UUID, second entry is avatar name.
- // We probably should check for valid UUIDs; we may do that at some point. (gwyneth 20211031)
- jsonNewEntry, err := json.Marshal(avatarUUID{record[1], record[0], "Production"}) // W-Hat keys come all from the main LL grid, known as 'Production'
- if err != nil {
- log.Warning(err)
- } else {
- if limit % loopBatch == 0 {
- log.Debugf("Entry %04d - Name: %s UUID: %s - JSON: %s\n", limit, record[1], record[0], jsonNewEntry)
- }
- // Place this record under the avatar's name
- if err = txn.Set([]byte(record[1]), jsonNewEntry); err != nil {
- log.Fatal(err)
- }
- // Now place it again, this time under the avatar's key
- if err = txn.Set([]byte(record[0]), jsonNewEntry); err != nil {
- log.Fatal(err)
- }
- }
- if limit % BATCH_BLOCK == 0 && limit != 0 { // we do not run on the first time, and then only every BATCH_BLOCK times
- log.Info("processing:", limit)
- if err = txn.Commit(); err != nil {
- log.Fatal(err)
- }
- runtime.GC()
- txn = kv.NewTransaction(true) // start a new transaction
- defer txn.Discard()
- }
- }
- // commit last batch
- if err = txn.Commit(); err != nil {
- log.Fatal(err)
- }
- case "buntdb":
- db, err := buntdb.Open(goslConfig.dbNamePath)
- checkErrPanic(err)
- defer db.Close()
- txn, err := db.Begin(true)
- checkErrPanic(err)
- //defer txn.Commit()
- // very similar to Badger code...
- for ; ; limit++ {
- record, err := cr.Read()
- if err == io.EOF {
- break
- } else if err != nil {
- log.Fatal(err)
- }
- jsonNewEntry, err := json.Marshal(avatarUUID{record[1], record[0], "Production"})
- if err != nil {
- log.Warning(err)
- } else {
- // see comments above for Badger. (gwyneth 20211031)
- _, _, err = txn.Set(record[1], string(jsonNewEntry), nil)
- if err != nil {
- log.Fatal(err)
- }
- _, _, err = txn.Set(record[0], string(jsonNewEntry), nil)
- if err != nil {
- log.Fatal(err)
- }
- }
- if limit % loopBatch == 0 {
- log.Debugf("Entry %04d - Name: %s UUID: %s - JSON: %s\n", limit, record[1], record[0], jsonNewEntry)
- }
- if limit % BATCH_BLOCK == 0 && limit != 0 { // we do not run on the first time, and then only every BATCH_BLOCK times
- log.Info("processing:", limit)
- if err = txn.Commit(); err != nil {
- log.Fatal(err)
- }
- runtime.GC()
- txn, err = db.Begin(true) // start a new transaction
- checkErrPanic(err)
- //defer txn.Commit()
- }
- }
- // commit last batch
- if err = txn.Commit(); err != nil {
- log.Fatal(err)
- }
- db.Shrink()
- case "leveldb":
- db, err := leveldb.OpenFile(goslConfig.dbNamePath, nil)
- checkErrPanic(err)
- defer db.Close()
- batch := new(leveldb.Batch)
- for ; ; limit++ {
- record, err := cr.Read()
- if err == io.EOF {
- break
- } else if err != nil {
- log.Fatal(err)
- }
- jsonNewEntry, err := json.Marshal(avatarUUID{record[1], record[0], "Production"})
- if err != nil {
- log.Warning(err)
- } else {
- // see comments above for Badger. (gwyneth 20211031)
- batch.Put([]byte(record[1]), jsonNewEntry)
- batch.Put([]byte(record[0]), jsonNewEntry)
- }
- if limit % loopBatch == 0 {
- log.Debugf("Entry %04d - Name: %s UUID: %s - JSON: %s\n", limit, record[1], record[0], jsonNewEntry)
- }
- if limit % BATCH_BLOCK == 0 && limit != 0 {
- log.Info("processing:", limit)
- if err = db.Write(batch, nil); err != nil {
- log.Fatal(err)
- }
- batch.Reset() // unlike the others, we don't need to create a new batch every time
- runtime.GC() // it never hurts...
- }
- }
- // commit last batch
- if err = db.Write(batch, nil); err != nil {
- log.Fatal(err)
- }
- batch.Reset() // reset it and let the garbage collector run
- runtime.GC()
- db.CompactRange(util.Range{Start: nil, Limit: nil})
- }
- log.Info("total read", limit, "records (or thereabouts) in", time.Since(time_start))
- }
|