diff options
author | Ayke van Laethem <[email protected]> | 2021-03-12 16:47:08 +0100 |
---|---|---|
committer | Ron Evans <[email protected]> | 2021-04-05 20:52:04 +0200 |
commit | fb03787b73ff62f4dcab767e8f10d19f4e747c8d (patch) | |
tree | aafa4b14bc1b25f928e868bdd3dc3693225b32cd | |
parent | 83a949647f57316ba3de8136ec9793541291d0e1 (diff) | |
download | tinygo-fb03787b73ff62f4dcab767e8f10d19f4e747c8d.tar.gz tinygo-fb03787b73ff62f4dcab767e8f10d19f4e747c8d.zip |
builder: cache C and assembly file outputs
This probably won't speed up the build on multicore systems (the build
is still dominated by the whole-program optimization step) but should be
useful at a later date for other optimizations. For example, I intend to
eventually optimize each package individually including C files, which
should enable cross-language optimizations (inlining C functions into Go
functions, for example). For that to work, accurate dependency tracking
is important.
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | builder/build.go | 35 | ||||
-rw-r--r-- | builder/cc.go | 302 | ||||
-rw-r--r-- | builder/cc_test.go | 33 |
4 files changed, 349 insertions, 23 deletions
@@ -180,7 +180,7 @@ tinygo: CGO_CPPFLAGS="$(CGO_CPPFLAGS)" CGO_CXXFLAGS="$(CGO_CXXFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GO) build -buildmode exe -o build/tinygo$(EXE) -tags byollvm -ldflags="-X main.gitSha1=`git rev-parse --short HEAD`" . test: wasi-libc - CGO_CPPFLAGS="$(CGO_CPPFLAGS)" CGO_CXXFLAGS="$(CGO_CXXFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GO) test -v -buildmode exe -tags byollvm ./cgo ./compileopts ./compiler ./interp ./transform . + CGO_CPPFLAGS="$(CGO_CPPFLAGS)" CGO_CXXFLAGS="$(CGO_CXXFLAGS)" CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GO) test -v -buildmode exe -tags byollvm ./builder ./cgo ./compileopts ./compiler ./interp ./transform . # Test known-working standard library packages. # TODO: do this in one command, parallelize, and only show failing tests (no diff --git a/builder/build.go b/builder/build.go index 1da4d80bf..8b9e9e814 100644 --- a/builder/build.go +++ b/builder/build.go @@ -17,7 +17,6 @@ import ( "path/filepath" "runtime" "sort" - "strconv" "strings" "github.com/tinygo-org/tinygo/compileopts" @@ -426,18 +425,14 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil // Add jobs to compile extra files. These files are in C or assembly and // contain things like the interrupt vector table and low level operations // such as stack switching. - for i, path := range config.ExtraFiles() { + for _, path := range config.ExtraFiles() { abspath := filepath.Join(root, path) - outpath := filepath.Join(dir, "extra-"+strconv.Itoa(i)+"-"+filepath.Base(path)+".o") job := &compileJob{ description: "compile extra file " + path, - result: outpath, - run: func(*compileJob) error { - err := runCCompiler(config.Target.Compiler, append(config.CFlags(), "-c", "-o", outpath, abspath)...) - if err != nil { - return &commandError{"failed to build", path, err} - } - return nil + run: func(job *compileJob) error { + result, err := compileAndCacheCFile(abspath, dir, config) + job.result = result + return err }, } jobs = append(jobs, job) @@ -447,19 +442,15 @@ func Build(pkgName, outpath string, config *compileopts.Config, action func(Buil // Add jobs to compile C files in all packages. This is part of CGo. // TODO: do this as part of building the package to be able to link the // bitcode files together. - for i, pkg := range lprogram.Sorted() { - for j, filename := range pkg.CFiles { - file := filepath.Join(pkg.Dir, filename) - outpath := filepath.Join(dir, "pkg"+strconv.Itoa(i)+"."+strconv.Itoa(j)+"-"+filepath.Base(file)+".o") + for _, pkg := range lprogram.Sorted() { + for _, filename := range pkg.CFiles { + abspath := filepath.Join(pkg.Dir, filename) job := &compileJob{ - description: "compile CGo file " + file, - result: outpath, - run: func(*compileJob) error { - err := runCCompiler(config.Target.Compiler, append(config.CFlags(), "-c", "-o", outpath, file)...) - if err != nil { - return &commandError{"failed to build", file, err} - } - return nil + description: "compile CGo file " + abspath, + run: func(job *compileJob) error { + result, err := compileAndCacheCFile(abspath, dir, config) + job.result = result + return err }, } jobs = append(jobs, job) diff --git a/builder/cc.go b/builder/cc.go new file mode 100644 index 000000000..89dd7ca50 --- /dev/null +++ b/builder/cc.go @@ -0,0 +1,302 @@ +package builder + +// This file implements a wrapper around the C compiler (Clang) which uses a +// build cache. + +import ( + "crypto/sha512" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "sort" + "strings" + "unicode" + + "github.com/tinygo-org/tinygo/compileopts" + "github.com/tinygo-org/tinygo/goenv" + "tinygo.org/x/go-llvm" +) + +// compileAndCacheCFile compiles a C or assembly file using a build cache. +// Compiling the same file again (if nothing changed, including included header +// files) the output is loaded from the build cache instead. +// +// Its operation is a bit complex (more complex than Go package build caching) +// because the list of file dependencies is only known after the file is +// compiled. However, luckily compilers have a flag to write a list of file +// dependencies in Makefile syntax which can be used for caching. +// +// Because of this complexity, every file has in fact two cached build outputs: +// the file itself, and the list of dependencies. Its operation is as follows: +// +// depfile = hash(path, compiler, cflags, ...) +// if depfile exists: +// outfile = hash of all files and depfile name +// if outfile exists: +// # cache hit +// return outfile +// # cache miss +// tmpfile = compile file +// read dependencies (side effect of compile) +// write depfile +// outfile = hash of all files and depfile name +// rename tmpfile to outfile +// +// There are a few edge cases that are not handled: +// - If a file is added to an include path, that file may be included instead of +// some other file. This would be fixed by also including lookup failures in the +// dependencies file, but I'm not aware of a compiler which does that. +// - The Makefile syntax that compilers output has issues, see readDepFile for +// details. +// - A header file may be changed to add/remove an include. This invalidates the +// depfile but without invalidating its name. For this reason, the depfile is +// written on each new compilation (even when it seems unnecessary). However, it +// could in rare cases lead to a stale file fetched from the cache. +func compileAndCacheCFile(abspath, tmpdir string, config *compileopts.Config) (string, error) { + // Hash input file. + fileHash, err := hashFile(abspath) + if err != nil { + return "", err + } + + // Create cache key for the dependencies file. + buf, err := json.Marshal(struct { + Path string + Hash string + Compiler string + Flags []string + LLVMVersion string + }{ + Path: abspath, + Hash: fileHash, + Compiler: config.Target.Compiler, + Flags: config.CFlags(), + LLVMVersion: llvm.Version, + }) + if err != nil { + panic(err) // shouldn't happen + } + depfileNameHashBuf := sha512.Sum512_224(buf) + depfileNameHash := hex.EncodeToString(depfileNameHashBuf[:]) + + // Load dependencies file, if possible. + depfileName := "dep-" + depfileNameHash + ".json" + depfileCachePath := filepath.Join(goenv.Get("GOCACHE"), depfileName) + depfileBuf, err := ioutil.ReadFile(depfileCachePath) + var dependencies []string // sorted list of dependency paths + if err == nil { + // There is a dependency file, that's great! + // Parse it first. + err := json.Unmarshal(depfileBuf, &dependencies) + if err != nil { + return "", fmt.Errorf("could not parse dependencies JSON: %w", err) + } + + // Obtain hashes of all the files listed as a dependency. + outpath, err := makeCFileCachePath(dependencies, depfileNameHash) + if err == nil { + if _, err := os.Stat(outpath); err == nil { + return outpath, nil + } else if !os.IsNotExist(err) { + return "", err + } + } + } else if !os.IsNotExist(err) { + // expected either nil or IsNotExist + return "", err + } + + objTmpFile, err := ioutil.TempFile(goenv.Get("GOCACHE"), "tmp-*.o") + if err != nil { + return "", err + } + objTmpFile.Close() + depTmpFile, err := ioutil.TempFile(tmpdir, "dep-*.d") + if err != nil { + return "", err + } + depTmpFile.Close() + flags := config.CFlags() + flags = append(flags, "-MD", "-MV", "-MTdeps", "-MF", depTmpFile.Name()) // autogenerate dependencies + flags = append(flags, "-c", "-o", objTmpFile.Name(), abspath) + err = runCCompiler(config.Target.Compiler, flags...) + if err != nil { + return "", &commandError{"failed to build", abspath, err} + } + + // Create sorted and uniqued slice of dependencies. + dependencyPaths, err := readDepFile(depTmpFile.Name()) + if err != nil { + return "", err + } + dependencyPaths = append(dependencyPaths, abspath) // necessary for .s files + dependencySet := make(map[string]struct{}, len(dependencyPaths)) + var dependencySlice []string + for _, path := range dependencyPaths { + if _, ok := dependencySet[path]; ok { + continue + } + dependencySet[path] = struct{}{} + dependencySlice = append(dependencySlice, path) + } + sort.Strings(dependencySlice) + + // Write dependencies file. + f, err := ioutil.TempFile(filepath.Dir(depfileCachePath), depfileName) + buf, err = json.MarshalIndent(dependencySlice, "", "\t") + if err != nil { + panic(err) // shouldn't happen + } + _, err = f.Write(buf) + if err != nil { + return "", err + } + err = f.Close() + if err != nil { + return "", err + } + err = os.Rename(f.Name(), depfileCachePath) + if err != nil { + return "", err + } + + // Move temporary object file to final location. + outpath, err := makeCFileCachePath(dependencySlice, depfileNameHash) + if err != nil { + return "", err + } + err = os.Rename(objTmpFile.Name(), outpath) + if err != nil { + return "", err + } + + return outpath, nil +} + +// Create a cache path (a path in GOCACHE) to store the output of a compiler +// job. This path is based on the dep file name (which is a hash of metadata +// including compiler flags) and the hash of all input files in the paths slice. +func makeCFileCachePath(paths []string, depfileNameHash string) (string, error) { + // Hash all input files. + fileHashes := make(map[string]string, len(paths)) + for _, path := range paths { + hash, err := hashFile(path) + if err != nil { + return "", err + } + fileHashes[path] = hash + } + + // Calculate a cache key based on the above hashes. + buf, err := json.Marshal(struct { + DepfileHash string + FileHashes map[string]string + }{ + DepfileHash: depfileNameHash, + FileHashes: fileHashes, + }) + if err != nil { + panic(err) // shouldn't happen + } + outFileNameBuf := sha512.Sum512_224(buf) + cacheKey := hex.EncodeToString(outFileNameBuf[:]) + + outpath := filepath.Join(goenv.Get("GOCACHE"), "obj-"+cacheKey+".o") + return outpath, nil +} + +// hashFile hashes the given file path and returns the hash as a hex string. +func hashFile(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", fmt.Errorf("failed to hash file: %w", err) + } + defer f.Close() + fileHasher := sha512.New512_224() + _, err = io.Copy(fileHasher, f) + if err != nil { + return "", fmt.Errorf("failed to hash file: %w", err) + } + return hex.EncodeToString(fileHasher.Sum(nil)), nil +} + +// readDepFile reads a dependency file in NMake (Visual Studio make) format. The +// file is assumed to have a single target named deps. +// +// There are roughly three make syntax variants: +// - BSD make, which doesn't support any escaping. This means that many special +// characters are not supported in file names. +// - GNU make, which supports escaping using a backslash but when it fails to +// find a file it tries to fall back with the literal path name (to match BSD +// make). +// - NMake (Visual Studio) and Jom, which simply quote the string if there are +// any weird characters. +// Clang supports two variants: a format that's a compromise between BSD and GNU +// make (and is buggy to match GCC which is equally buggy), and NMake/Jom, which +// is at least somewhat sane. This last format isn't perfect either: it does not +// correctly handle filenames with quote marks in them. Those are generally not +// allowed on Windows, but of course can be used on POSIX like systems. Still, +// it's the most sane of any of the formats so readDepFile will use that format. +func readDepFile(filename string) ([]string, error) { + buf, err := ioutil.ReadFile(filename) + if err != nil { + return nil, err + } + if len(buf) == 0 { + return nil, nil + } + return parseDepFile(string(buf)) +} + +func parseDepFile(s string) ([]string, error) { + // This function makes no attempt at parsing anything other than Clang -MD + // -MV output. + + // For Windows: replace CRLF with LF to make the logic below simpler. + s = strings.ReplaceAll(s, "\r\n", "\n") + + // Collapse all lines ending in a backslash. These backslashes are really + // just a way to continue a line without making very long lines. + s = strings.ReplaceAll(s, "\\\n", " ") + + // Only use the first line, which is expected to begin with "deps:". + line := strings.SplitN(s, "\n", 2)[0] + if !strings.HasPrefix(line, "deps:") { + return nil, errors.New("readDepFile: expected 'deps:' prefix") + } + line = strings.TrimSpace(line[len("deps:"):]) + + var deps []string + for line != "" { + if line[0] == '"' { + // File path is quoted. Path ends with double quote. + // This does not handle double quotes in path names, which is a + // problem on non-Windows systems. + line = line[1:] + end := strings.IndexByte(line, '"') + if end < 0 { + return nil, errors.New("readDepFile: path is incorrectly quoted") + } + dep := line[:end] + line = strings.TrimSpace(line[end+1:]) + deps = append(deps, dep) + } else { + // File path is not quoted. Path ends in space or EOL. + end := strings.IndexFunc(line, unicode.IsSpace) + if end < 0 { + // last dependency + deps = append(deps, line) + break + } + dep := line[:end] + line = strings.TrimSpace(line[end:]) + deps = append(deps, dep) + } + } + return deps, nil +} diff --git a/builder/cc_test.go b/builder/cc_test.go new file mode 100644 index 000000000..085528060 --- /dev/null +++ b/builder/cc_test.go @@ -0,0 +1,33 @@ +package builder + +import ( + "reflect" + "testing" +) + +func TestSplitDepFile(t *testing.T) { + for i, tc := range []struct { + in string + out []string + }{ + {`deps: foo bar`, []string{"foo", "bar"}}, + {`deps: foo "bar"`, []string{"foo", "bar"}}, + {`deps: "foo" bar`, []string{"foo", "bar"}}, + {`deps: "foo bar"`, []string{"foo bar"}}, + {`deps: "foo bar" `, []string{"foo bar"}}, + {"deps: foo\nbar", []string{"foo"}}, + {"deps: foo \\\nbar", []string{"foo", "bar"}}, + {"deps: foo\\bar \\\nbaz", []string{"foo\\bar", "baz"}}, + {"deps: foo\\bar \\\r\n baz", []string{"foo\\bar", "baz"}}, // Windows uses CRLF line endings + } { + out, err := parseDepFile(tc.in) + if err != nil { + t.Errorf("test #%d failed: %v", i, err) + continue + } + if !reflect.DeepEqual(out, tc.out) { + t.Errorf("test #%d failed: expected %#v but got %#v", i, tc.out, out) + continue + } + } +} |