Skip to content

Commit

Permalink
Enable offline install of labs projects (#2049)
Browse files Browse the repository at this point in the history
## Changes
<!-- Summary of your changes that are easy to understand -->
This PR makes changes to the labs code base to allow for offline
installation of labs projects (like UCX). By passing a flag
--offline=true, the code will skip checking for project versions and
download code from GitHub and instead will look from the local
installation folder. This cmd is useful in systems where there is
internet restriction, the user should follow a set-up as follows:

- install a labs project on a machine which has internet
- zip and copy the file to the intended machine and 
- run databricks labs install <project name>--offline=true
it will look for the code in the same install directory and if present
load from there.


Closes #1646 

related to databrickslabs/ucx#3418
## Tests
<!-- How is this tested? -->

Added unit test case and tested.

NO_CHANGELOG=true

---------

Signed-off-by: dependabot[bot] <[email protected]>
Co-authored-by: Pieter Noordhuis <[email protected]>
Co-authored-by: Lennart Kats (databricks) <[email protected]>
Co-authored-by: Denis Bilenko <[email protected]>
Co-authored-by: Julia Crawford (Databricks) <[email protected]>
Co-authored-by: Ilya Kuznetsov <[email protected]>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Andrew Nester <[email protected]>
Co-authored-by: Anton Nekipelov <[email protected]>
Co-authored-by: shreyas-goenka <[email protected]>
  • Loading branch information
10 people authored Mar 10, 2025
1 parent b5a7023 commit fa0a734
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 43 deletions.
26 changes: 16 additions & 10 deletions cmd/labs/github/releases.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,31 @@ const cacheTTL = 1 * time.Hour

// NewReleaseCache creates a release cache for a repository in the GitHub org.
// Caller has to provide different cache directories for different repositories.
func NewReleaseCache(org, repo, cacheDir string) *ReleaseCache {
func NewReleaseCache(org, repo, cacheDir string, offlineInstall bool) *ReleaseCache {
pattern := fmt.Sprintf("%s-%s-releases", org, repo)
return &ReleaseCache{
cache: localcache.NewLocalCache[Versions](cacheDir, pattern, cacheTTL),
Org: org,
Repo: repo,
cache: localcache.NewLocalCache[Versions](cacheDir, pattern, cacheTTL),
Org: org,
Repo: repo,
Offline: offlineInstall,
}
}

type ReleaseCache struct {
cache localcache.LocalCache[Versions]
Org string
Repo string
cache localcache.LocalCache[Versions]
Org string
Repo string
Offline bool
}

func (r *ReleaseCache) Load(ctx context.Context) (Versions, error) {
return r.cache.Load(ctx, func() (Versions, error) {
return getVersions(ctx, r.Org, r.Repo)
})
if !r.Offline {
return r.cache.Load(ctx, func() (Versions, error) {
return getVersions(ctx, r.Org, r.Repo)
})
}
cached, err := r.cache.LoadCache()
return cached.Data, err
}

// getVersions is considered to be a private API, as we want the usage go through a cache
Expand Down
2 changes: 1 addition & 1 deletion cmd/labs/github/releases_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ func TestLoadsReleasesForCLI(t *testing.T) {
ctx := context.Background()
ctx = WithApiOverride(ctx, server.URL)

r := NewReleaseCache("databricks", "cli", t.TempDir())
r := NewReleaseCache("databricks", "cli", t.TempDir(), false)
all, err := r.Load(ctx)
assert.NoError(t, err)
assert.Len(t, all, 2)
Expand Down
26 changes: 15 additions & 11 deletions cmd/labs/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,20 @@ import (
)

func newInstallCommand() *cobra.Command {
return &cobra.Command{
Use: "install NAME",
Args: root.ExactArgs(1),
Short: "Installs project",
RunE: func(cmd *cobra.Command, args []string) error {
inst, err := project.NewInstaller(cmd, args[0])
if err != nil {
return err
}
return inst.Install(cmd.Context())
},
cmd := &cobra.Command{}
var offlineInstall bool

cmd.Flags().BoolVar(&offlineInstall, "offline", offlineInstall, `If installing in offline mode, set this flag to true.`)

cmd.Use = "install NAME"
cmd.Args = root.ExactArgs(1)
cmd.Short = "Installs project"
cmd.RunE = func(cmd *cobra.Command, args []string) error {
inst, err := project.NewInstaller(cmd, args[0], offlineInstall)
if err != nil {
return err
}
return inst.Install(cmd.Context())
}
return cmd
}
4 changes: 2 additions & 2 deletions cmd/labs/localcache/jsonfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ type LocalCache[T any] struct {
}

func (r *LocalCache[T]) Load(ctx context.Context, refresh func() (T, error)) (T, error) {
cached, err := r.loadCache()
cached, err := r.LoadCache()
if errors.Is(err, fs.ErrNotExist) {
return r.refreshCache(ctx, refresh, r.zero)
} else if err != nil {
Expand Down Expand Up @@ -96,7 +96,7 @@ func (r *LocalCache[T]) FileName() string {
return filepath.Join(r.dir, r.name+".json")
}

func (r *LocalCache[T]) loadCache() (*cached[T], error) {
func (r *LocalCache[T]) LoadCache() (*cached[T], error) {
jsonFile := r.FileName()
raw, err := os.ReadFile(r.FileName())
if err != nil {
Expand Down
45 changes: 31 additions & 14 deletions cmd/labs/project/fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func (d *devInstallation) Install(ctx context.Context) error {
return d.Installer.runHook(d.Command)
}

func NewInstaller(cmd *cobra.Command, name string) (installable, error) {
func NewInstaller(cmd *cobra.Command, name string, offlineInstall bool) (installable, error) {
if name == "." {
wd, err := os.Getwd()
if err != nil {
Expand All @@ -75,28 +75,32 @@ func NewInstaller(cmd *cobra.Command, name string) (installable, error) {
version = "latest"
}
f := &fetcher{name}
version, err := f.checkReleasedVersions(cmd, version)

version, err := f.checkReleasedVersions(cmd, version, offlineInstall)
if err != nil {
return nil, fmt.Errorf("version: %w", err)
}
prj, err := f.loadRemoteProjectDefinition(cmd, version)

prj, err := f.loadRemoteProjectDefinition(cmd, version, offlineInstall)
if err != nil {
return nil, fmt.Errorf("remote: %w", err)
}

return &installer{
Project: prj,
version: version,
cmd: cmd,
Project: prj,
version: version,
cmd: cmd,
offlineInstall: offlineInstall,
}, nil
}

func NewUpgrader(cmd *cobra.Command, name string) (*installer, error) {
f := &fetcher{name}
version, err := f.checkReleasedVersions(cmd, "latest")
version, err := f.checkReleasedVersions(cmd, "latest", false)
if err != nil {
return nil, fmt.Errorf("version: %w", err)
}
prj, err := f.loadRemoteProjectDefinition(cmd, version)
prj, err := f.loadRemoteProjectDefinition(cmd, version, false)
if err != nil {
return nil, fmt.Errorf("remote: %w", err)
}
Expand All @@ -115,15 +119,16 @@ type fetcher struct {
name string
}

func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string) (string, error) {
func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string, offlineInstall bool) (string, error) {
ctx := cmd.Context()
cacheDir, err := PathInLabs(ctx, f.name, "cache")
if err != nil {
return "", err
}
// `databricks labs isntall X` doesn't know which exact version to fetch, so first
// we fetch all versions and then pick the latest one dynamically.
versions, err := github.NewReleaseCache("databrickslabs", f.name, cacheDir).Load(ctx)
var versions github.Versions
versions, err = github.NewReleaseCache("databrickslabs", f.name, cacheDir, offlineInstall).Load(ctx)
if err != nil {
return "", fmt.Errorf("versions: %w", err)
}
Expand All @@ -140,11 +145,23 @@ func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string) (str
return version, nil
}

func (i *fetcher) loadRemoteProjectDefinition(cmd *cobra.Command, version string) (*Project, error) {
func (i *fetcher) loadRemoteProjectDefinition(cmd *cobra.Command, version string, offlineInstall bool) (*Project, error) {
ctx := cmd.Context()
raw, err := github.ReadFileFromRef(ctx, "databrickslabs", i.name, version, "labs.yml")
if err != nil {
return nil, fmt.Errorf("read labs.yml from GitHub: %w", err)
var raw []byte
var err error
if !offlineInstall {
raw, err = github.ReadFileFromRef(ctx, "databrickslabs", i.name, version, "labs.yml")
if err != nil {
return nil, fmt.Errorf("read labs.yml from GitHub: %w", err)
}
} else {
libDir, _ := PathInLabs(ctx, i.name, "lib")
fileName := filepath.Join(libDir, "labs.yml")
raw, err = os.ReadFile(fileName)
if err != nil {
return nil, fmt.Errorf("read labs.yml from local path %s: %w", libDir, err)
}
}

return readFromBytes(ctx, raw)
}
15 changes: 11 additions & 4 deletions cmd/labs/project/installer.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ type installer struct {
// command instance is used for:
// - auth profile flag override
// - standard input, output, and error streams
cmd *cobra.Command
cmd *cobra.Command
offlineInstall bool
}

func (i *installer) Install(ctx context.Context) error {
Expand All @@ -101,9 +102,15 @@ func (i *installer) Install(ctx context.Context) error {
} else if err != nil {
return fmt.Errorf("login: %w", err)
}
err = i.downloadLibrary(ctx)
if err != nil {
return fmt.Errorf("lib: %w", err)
if !i.offlineInstall {
err = i.downloadLibrary(ctx)
if err != nil {
return fmt.Errorf("lib: %w", err)
}
}

if _, err := os.Stat(i.LibDir()); os.IsNotExist(err) {
return fmt.Errorf("no local installation found: %w", err)
}
err = i.setupPythonVirtualEnvironment(ctx, w)
if err != nil {
Expand Down
39 changes: 39 additions & 0 deletions cmd/labs/project/installer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,45 @@ func TestInstallerWorksForReleases(t *testing.T) {
r.RunAndExpectOutput("setting up important infrastructure")
}

func TestOfflineInstallerWorksForReleases(t *testing.T) {
// This cmd is useful in systems where there is internet restriction, the user should follow a set-up as follows:
// install a labs project on a machine which has internet
// zip and copy the file to the intended machine and
// run databricks labs install --offline=true
// it will look for the code in the same install directory and if present, install from there.
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/2.1/clusters/get" {
respondWithJSON(t, w, &compute.ClusterDetails{
State: compute.StateRunning,
})
return
}
t.Logf("Requested: %s", r.URL.Path)
t.FailNow()
}))
defer server.Close()

ctx := installerContext(t, server)
newHome := copyTestdata(t, "testdata/installed-in-home")
ctx = env.WithUserHomeDir(ctx, newHome)

ctx, stub := process.WithStub(ctx)
stub.WithStdoutFor(`python[\S]+ --version`, "Python 3.10.5")
// on Unix, we call `python3`, but on Windows it is `python.exe`
stub.WithStderrFor(`python[\S]+ -m venv .*/.databricks/labs/blueprint/state/venv`, "[mock venv create]")
stub.WithStderrFor(`python[\S]+ -m pip install --upgrade --upgrade-strategy eager .`, "[mock pip install]")
stub.WithStdoutFor(`python[\S]+ install.py`, "setting up important infrastructure")

// simulate the case of GitHub Actions
ctx = env.Set(ctx, "DATABRICKS_HOST", server.URL)
ctx = env.Set(ctx, "DATABRICKS_TOKEN", "...")
ctx = env.Set(ctx, "DATABRICKS_CLUSTER_ID", "installer-cluster")
ctx = env.Set(ctx, "DATABRICKS_WAREHOUSE_ID", "installer-warehouse")

r := testcli.NewRunner(t, ctx, "labs", "install", "blueprint", "--offline=true", "--debug")
r.RunAndExpectOutput("setting up important infrastructure")
}

func TestInstallerWorksForDevelopment(t *testing.T) {
defer func() {
if !t.Failed() {
Expand Down
2 changes: 1 addition & 1 deletion cmd/labs/project/project.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ func (p *Project) checkUpdates(cmd *cobra.Command) error {
// might not be installed yet
return nil
}
r := github.NewReleaseCache("databrickslabs", p.Name, p.CacheDir())
r := github.NewReleaseCache("databrickslabs", p.Name, p.CacheDir(), false)
versions, err := r.Load(ctx)
if err != nil {
return err
Expand Down

0 comments on commit fa0a734

Please sign in to comment.