7 년 전 · 446bed38fc
--- a/gosl.go
+++ b/gosl.go
@@ -39,6 +39,8 @@ type avatarUUID struct {
 
				 	Grid string
			
 
				 } 
			
 
				 
			
 
				+var BATCH_BLOCK = 100000	// how many entries to write to the database as a block; the bigger, the faster, but the more memory it consumes
			
 
				+
			
 
				 /*
			
 
				 				  .__			 
			
 
				   _____ _____  |__| ____  
			
@@ -117,7 +119,8 @@ func main() {
 
				 	Opt.Dir = *myDir
			
 
				 	Opt.ValueDir = Opt.Dir
			
 
				 	if *noMemory {
			
 
				-		Opt.TableLoadingMode = options.FileIO // use standard file I/O operations for tables instead of LoadRAM 
			
 
				+		Opt.TableLoadingMode = options.FileIO // use standard file I/O operations for tables instead of LoadRAM
			
 
				+		BATCH_BLOCK = 10000	// try to import less at each time, it will take longer but hopefully work
			
 
				 		log.Info("Trying to avoid too much memory consumption")	
			
 
				 	}
			
 
				 	kv, err := badger.NewKV(&Opt)
			
@@ -252,6 +255,7 @@ func handler(w http.ResponseWriter, r *http.Request) {
 
				 }
			
 
				 // searchKVname searches the KV database for an avatar name.
			
 
				 func searchKVname(avatarName string) (UUID string, grid string) {
			
 
				+	time_start := time.Now()
			
 
				 	kv, err := badger.NewKV(&Opt)
			
 
				 	defer kv.Close()
			
 
				 	var item badger.KVItem
			
@@ -266,19 +270,23 @@ func searchKVname(avatarName string) (UUID string, grid string) {
 
				 		log.Errorf("Error while unparsing UUID for name: %s - %v\n", avatarName, err)
			
 
				 		return NullUUID, ""
			
 
				 	}
			
 
				+	time_end := time.Now()
			
 
				+	diffTime := time_end.Sub(time_start)
			
 
				+	log.Debugf("Time to lookup '%s': %v\n", avatarName, diffTime)
			
 
				 	return val.UUID, val.Grid
			
 
				 }
			
 
				 // searchKVUUID searches the KV database for an avatar key.
			
 
				 func searchKVUUID(avatarKey string) (name string, grid string) {
			
 
				+	time_start := time.Now()
			
 
				 	kv, err := badger.NewKV(&Opt)
			
 
				 	checkErr(err) // should probably panic
			
 
				-
			
 
				 	itOpt := badger.DefaultIteratorOptions
			
 
				+	itOpt.PrefetchValues = true
			
 
				+	itOpt.PrefetchSize = 1000	// attempt to get this a little bit more efficient; we have many small entries, so this is not too much
			
 
				 	itr := kv.NewIterator(itOpt)
			
 
				 	var val = avatarUUID{ NullUUID, "" }
			
 
				 	var found string
			
 
				 	checks := 0
			
 
				-	time_start := time.Now()
			
 
				 	for itr.Rewind(); itr.Valid(); itr.Next() {
			
 
				 		item := itr.Item()
			
 
				 		if err = item.Value(func(v []byte) {
			
@@ -310,13 +318,14 @@ func importDatabase(filename string) {
 
				 	defer f.Close()
			
 
				 	gr := bzip2.NewReader(f) // open bzip2 reader
			
 
				 	cr := csv.NewReader(gr)  // open csv reader and feed the bzip2 reader into it
			
 
				-	limit := 0
			
 
				 	kv, err := badger.NewKV(&Opt)
			
 
				 	checkErrPanic(err) // should probably panic		
			
 
				 	defer kv.Close()
			
 
				 	time_start := time.Now()
			
 
				-	// probably better to get several chunks and use BatchSet concurrently, as recommended per the Badger instructions
			
 
				-	for {
			
 
				+	var oneEntry badger.Entry	// make sure this one has at least some memory assigned to it
			
 
				+	limit := 0	// outside of for loop so that we can count how many entries we had in total
			
 
				+	var batch = make([]badger.Entry, BATCH_BLOCK) // this is the actual data, or else we get pointers to nil
			
 
				+	for ;;limit++ {
			
 
				 		record, err := cr.Read()
			
 
				 		if err == io.EOF {
			
 
				 			break
			
@@ -324,25 +333,63 @@ func importDatabase(filename string) {
 
				 		if err != nil {
			
 
				 			log.Fatal(err)
			
 
				 		}
			
 
				-		//fmt.Println("Key:", record[0], "Name:", record[1])			
			
 
				 		jsonNewEntry, err := json.Marshal(avatarUUID{ record[0], "Production" }) // W-Hat keys come all from the main LL grid, known as 'Production'
			
 
				 		if err != nil {
			
 
				 			log.Warning(err)
			
 
				 		} else {
			
 
				-			kv.Set([]byte(record[1]), []byte(jsonNewEntry), 0x00)
			
 
				+			oneEntry = badger.Entry{ Key: []byte(record[1]), Value: []byte(jsonNewEntry)}
			
 
				+			batch[limit % BATCH_BLOCK] = oneEntry
			
 
				 		}
			
 
				-		limit++
			
 
				-		if limit % 1000000 == 0 {
			
 
				-			time_end := time.Now()
			
 
				-			diffTime := time_end.Sub(time_start)
			
 
				-			log.Info("Read", limit, "records (or thereabouts) in", diffTime)
			
 
				+		if limit % BATCH_BLOCK == 0 && limit != 0 { // we do not run on the first time, and then only every BATCH_BLOCK times
			
 
				+			log.Debug("Processing:", limit)
			
 
				+			go writeOneBatch(kv, batch)
			
 
				+			batch = nil // clear all entries, start a new batch
			
 
				+			runtime.GC()
			
 
				+			batch = make([]badger.Entry, BATCH_BLOCK)
			
 
				 		}
			
 
				 	}
			
 
				+	writeOneBatch(kv, batch)	// NOTE(gwyneth): these are the final ones (i.e. from the last round number of BATCH_BLOCK up to the end) 
			
 
				+								// and we do not run them as goroutine because the function might terminate and close our channel before
			
 
				+								// this finishes
			
 
				+								// BUG(gwyneth): if BATCH_BLOCK is too small, or we finish exactly on the modulo, we may still have a
			
 
				+								// racing condition! 
			
 
				+	batch = nil // flag the garbage collector that we are finished with this array
			
 
				 	time_end := time.Now()
			
 
				 	diffTime := time_end.Sub(time_start)
			
 
				 	log.Info("Total read", limit, "records (or thereabouts) in", diffTime)
			
 
				 }
			
 
				 
			
 
				+// writeOneBatch copies all entries in this batch into the correct kind of array to push them out as a batch.
			
 
				+// Since this is called as a goroutine, there will be thousands of those batches around!
			
 
				+func writeOneBatch(kv *badger.KV, batch []badger.Entry) {
			
 
				+	if kv == nil {
			
 
				+		log.Panic("kv should NEVER be nil")
			
 
				+		return
			
 
				+	}
			
 
				+	if batch == nil || len(batch) == 0 {
			
 
				+		log.Panic("batch should NEVER be nil or have zero elements")
			
 
				+		return
			
 
				+	}
			
 
				+	time_start := time.Now()
			
 
				+	var entries = make([]*badger.Entry, BATCH_BLOCK)	// prepare entries for BatchSet
			
 
				+	for i := 0; i < BATCH_BLOCK; i++ {
			
 
				+		entries[i] = &batch[i]
			
 
				+	}
			
 
				+	if entries == nil || len(entries) == 0{
			
 
				+		log.Panic("entries should NEVER be nil or have zero elements")
			
 
				+		return
			
 
				+	}
			
 
				+	checkErr(kv.BatchSet(entries))
			
 
				+		for _, e := range entries {
			
 
				+		checkErr(e.Error)
			
 
				+	}
			
 
				+	entries = nil // flag the garbage collector that we're finished with this array
			
 
				+	runtime.GC()
			
 
				+	time_end := time.Now()
			
 
				+	diffTime := time_end.Sub(time_start)
			
 
				+	log.Debug("goroutine sent a batch of", BATCH_BLOCK, "records in", diffTime)
			
 
				+}
			
 
				+
			
 
				 // NOTE(gwyneth):Auxiliary functions which I'm always using...
			
 
				 
			
 
				 // checkErrPanic logs a fatal error and panics.