Skip to content

Commit

Permalink
Compress the languages data (#468)
Browse files Browse the repository at this point in the history
There is a lot of repeated content in languages.json, so using
gzip compression can significantly reduce the size of the compiled
binary file.

On my windows machine with go1.22, it shows:

- "processor/constants.go": 170k -> 20k, 150k reduced.
- scc binary file: 8909k -> 8836.5k, 72.5k reduced.

Gzip is very fast, as hyperfine reports there's nearly no harm on
the performance.
  • Loading branch information
apocelipes committed May 27, 2024
1 parent 624d6c5 commit 4144d52
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 4 deletions.
2 changes: 1 addition & 1 deletion processor/constants.go

Large diffs are not rendered by default.

10 changes: 8 additions & 2 deletions processor/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
package processor

import (
"bytes"
"compress/gzip"
"encoding/base64"
"fmt"
"os"
Expand Down Expand Up @@ -504,13 +506,17 @@ func loadDatabase() map[string]Language {
var database map[string]Language
startTime := makeTimestampMilli()

data, err := base64.StdEncoding.DecodeString(languages)
gzData, err := base64.StdEncoding.DecodeString(languages)
if err != nil {
panic(fmt.Sprintf("failed to base64 decode languages: %v", err))
}
dataReader, err := gzip.NewReader(bytes.NewReader(gzData))
if err != nil {
panic(fmt.Sprintf("failed to create gzip reader: %v", err))
}

var json = jsoniter.ConfigCompatibleWithStandardLibrary
if err := json.Unmarshal(data, &database); err != nil {
if err := json.NewDecoder(dataReader).Decode(&database); err != nil {
panic(fmt.Sprintf("languages json invalid: %v", err))
}

Expand Down
5 changes: 4 additions & 1 deletion scripts/include.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package main

import (
"compress/gzip"
"encoding/base64"
"encoding/json"
"fmt"
Expand Down Expand Up @@ -50,9 +51,11 @@ func generateConstants() error {
out.Write([]byte(strings.TrimSuffix(f.Name(), ".json") + " = `"))

enc := base64.NewEncoder(base64.StdEncoding, out)
if _, err := io.Copy(enc, f); err != nil {
gz, _ := gzip.NewWriterLevel(enc, gzip.BestSpeed)
if _, err := io.Copy(gz, f); err != nil {
return fmt.Errorf("failed to encode file '%s': %v", f.Name(), err)
}
gz.Close()
enc.Close()

out.Write([]byte("`\n"))
Expand Down

0 comments on commit 4144d52

Please sign in to comment.