From bd36ac48bd6a0f9e975242e86d724a337d91a060 Mon Sep 17 00:00:00 2001 From: laurentheirendt Date: Tue, 26 Jan 2021 16:03:15 +0100 Subject: [PATCH 1/8] build stable version of docs on master --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 5c48c91..ac7e626 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -4,9 +4,9 @@ name: Documentation on: push: branches: + - master - develop tags: '*' - pull_request: release: types: [published, created] From d25cce7fa799c33589bb7bbdc55af5f8d464410b Mon Sep 17 00:00:00 2001 From: laurentheirendt Date: Tue, 26 Jan 2021 16:03:46 +0100 Subject: [PATCH 2/8] bump version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 7f0a236..7029b23 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "DiDa" uuid = "f6a0035f-c5ac-4ad0-b410-ad102ced35df" authors = ["Mirek Kratochvil "] -version = "0.1.0" +version = "0.1.1" [deps] Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" From 160edcca10db4161d3609192f0b18dff384f1f08 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Wed, 27 Jan 2021 09:26:52 +0100 Subject: [PATCH 3/8] massively rename DiDa to DistributedData --- Project.toml | 2 +- README.md | 18 ++++++------------ docs/make.jl | 10 +++++----- docs/src/functions.md | 8 ++++---- docs/src/index.md | 2 +- docs/src/tutorial.md | 20 ++++++++++---------- src/{DiDa.jl => DistributedData.jl} | 2 +- test/base.jl | 10 +++++----- test/runtests.jl | 4 ++-- test/tools.jl | 2 +- 10 files changed, 36 insertions(+), 42 deletions(-) rename src/{DiDa.jl => DistributedData.jl} (95%) diff --git a/Project.toml b/Project.toml index 7f0a236..c099bb1 100644 --- a/Project.toml +++ b/Project.toml @@ -1,4 +1,4 @@ -name = "DiDa" +name = "DistributedData" uuid = "f6a0035f-c5ac-4ad0-b410-ad102ced35df" authors = ["Mirek Kratochvil "] version = "0.1.0" diff --git a/README.md b/README.md index 3b6ac39..9c6fd6d 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,20 @@ -# DiDa.jl logo DiDa.jl +# DistributedData.jl logo DistributedData.jl | Build status | Documentation | |:---:|:---:| -| ![CI](https://github.com/LCSB-BioCore/DiDa.jl/workflows/CI/badge.svg?branch=develop) | [![doc](https://img.shields.io/badge/docs-stable-blue)](https://lcsb-biocore.github.io/DiDa.jl/stable/) [![doc](https://img.shields.io/badge/docs-dev-blue)](https://lcsb-biocore.github.io/DiDa.jl/dev/) | +| ![CI](https://github.com/LCSB-BioCore/DistributedData.jl/workflows/CI/badge.svg?branch=develop) | [![doc](https://img.shields.io/badge/docs-stable-blue)](https://lcsb-biocore.github.io/DistributedData.jl/stable/) [![doc](https://img.shields.io/badge/docs-dev-blue)](https://lcsb-biocore.github.io/DistributedData.jl/dev/) | Simple distributed data manipulation and processing routines for Julia. This was originally developed for -[`GigaSOM.jl`](https://github.com/LCSB-BioCore/GigaSOM.jl); DiDa.jl package +[`GigaSOM.jl`](https://github.com/LCSB-BioCore/GigaSOM.jl); DistributedData.jl package contains the separated-out lightweight distributed-processing framework that was used in `GigaSOM.jl`. ## Why? -DiDa.jl provides a very simple, imperative and straightforward way to move your +DistributedData.jl provides a very simple, imperative and straightforward way to move your data around a cluster of Julia processes created by the [`Distributed`](https://docs.julialang.org/en/v1/stdlib/Distributed/) package, and run computation on the distributed data pieces. The main aim of the package @@ -43,14 +43,14 @@ same way, but takes the data back from the worker. You can thus send some random array to a few distributed workers: ```julia -julia> using Distributed, DiDa +julia> using Distributed, DistributedData julia> addprocs(2) 2-element Array{Int64,1}: 2 3 -julia> @everywhere using DiDa +julia> @everywhere using DistributedData julia> save_at(2, :x, randn(10,10)) Future(2, 1, 4, nothing) @@ -132,9 +132,3 @@ julia> gather_array(dataset) # download the data from workers to a sing 0.610183 1.12165 0.722438 ⋮ ``` - -## What does the name `DiDa` mean? - -**Di**stributed **Da**ta. - -There is no consensus on how to pronounce the shortcut. diff --git a/docs/make.jl b/docs/make.jl index 77ab4a1..3422fe6 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,10 +1,10 @@ -using Documenter, DiDa +using Documenter, DistributedData -makedocs(modules = [DiDa], +makedocs(modules = [DistributedData], clean = false, format = Documenter.HTML(prettyurls = !("local" in ARGS)), - sitename = "DiDa.jl", - authors = "The developers of DiDa.jl", + sitename = "DistributedData.jl", + authors = "The developers of DistributedData.jl", linkcheck = !("skiplinks" in ARGS), pages = [ "Documentation" => "index.md", @@ -14,7 +14,7 @@ makedocs(modules = [DiDa], ) deploydocs( - repo = "github.com/LCSB-BioCore/DiDa.jl.git", + repo = "github.com/LCSB-BioCore/DistributedData.jl.git", target = "build", branch = "gh-pages", devbranch = "develop", diff --git a/docs/src/functions.md b/docs/src/functions.md index f5cb708..af595e3 100644 --- a/docs/src/functions.md +++ b/docs/src/functions.md @@ -3,27 +3,27 @@ ## Data structures ```@autodocs -Modules = [DiDa] +Modules = [DistributedData] Pages = ["structs.jl"] ``` ## Base functions ```@autodocs -Modules = [DiDa] +Modules = [DistributedData] Pages = ["base.jl"] ``` ## Higher-level array operations ```@autodocs -Modules = [DiDa] +Modules = [DistributedData] Pages = ["tools.jl"] ``` ## Input/Output ```@autodocs -Modules = [DiDa] +Modules = [DistributedData] Pages = ["io.jl"] ``` diff --git a/docs/src/index.md b/docs/src/index.md index 2cc0dc6..05c6ec5 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,5 +1,5 @@ -# DiDa.jl — simple work with distributed data +# DistributedData.jl — simple work with distributed data This packages provides simple Distributed Data manipulation and processing routines for Julia. diff --git a/docs/src/tutorial.md b/docs/src/tutorial.md index 4009942..b59a1dc 100644 --- a/docs/src/tutorial.md +++ b/docs/src/tutorial.md @@ -1,13 +1,13 @@ -# DiDa tutorial +# DistributedData tutorial -The primary purpose of this tutorial is to get a basic grasp of the main `DiDa` +The primary purpose of this tutorial is to get a basic grasp of the main `DistributedData` functions and methodology. For starting up, let's create a few distributed workers and import the package: ```julia -julia> using Distributed, DiDa +julia> using Distributed, DistributedData julia> addprocs(3) 2-element Array{Int64,1}: @@ -15,12 +15,12 @@ julia> addprocs(3) 3 4 -julia> @everywhere using DiDa +julia> @everywhere using DistributedData ``` ## Moving the data around -In `DiDa`, the storage of distributed data is done in the "native" Julia way -- +In `DistributedData`, the storage of distributed data is done in the "native" Julia way -- the data is stored in normal named variables. Each node holds its own data in an arbitrary set of variables as "plain data"; content of these variables is completely independent among nodes. @@ -52,7 +52,7 @@ UndefValError: x not defined … ``` -`DiDa` uses *quoting* to allow you to precisely specify the parts of the code +`DistributedData` uses *quoting* to allow you to precisely specify the parts of the code that should be evaluated on the "main" Julia process (the one you interact with), and the code that should be evaluated on the remote workers. Basically, all quoted code is going to get to the workers without any evaluation; all @@ -192,7 +192,7 @@ beneficial for implementing advanced parallel algorithms. Remembering and managing the remote variable names and worker numbers is extremely impractical, especially if you need to maintain multiple variables on -various subsets of all available workers at once. `DiDa` defines a small +various subsets of all available workers at once. `DistributedData` defines a small [`Dinfo`](@ref) data structure that keeps that information for you. Many other functions are able to work with `Dinfo` transparently, instead of the "raw" symbols and worker lists. @@ -354,7 +354,7 @@ julia> dmap(Vector(1:length(workers())), ## Persisting the data -`DiDa` provides support for storing the loaded dataset in each worker's local +`DistributedData` provides support for storing the loaded dataset in each worker's local storage. This is quite beneficial for saving sub-results and various artifacts of the computation process for later use, without unnecessarily wasting main memory. @@ -378,9 +378,9 @@ significant overhead. ## Miscellaneous functions -For convenience, `DiDa` also contains simple implementations of various common +For convenience, `DistributedData` also contains simple implementations of various common utility operations for processing matrix data. These originated in -flow-cytometry use-cases (which is what `DiDa` was originally built for), but +flow-cytometry use-cases (which is what `DistributedData` was originally built for), but are applicable in many other areas of data analysis: - [`dselect`](@ref) reduces a matrix to several selected columns (in a diff --git a/src/DiDa.jl b/src/DistributedData.jl similarity index 95% rename from src/DiDa.jl rename to src/DistributedData.jl index f1d29b4..d339534 100644 --- a/src/DiDa.jl +++ b/src/DistributedData.jl @@ -1,4 +1,4 @@ -module DiDa +module DistributedData using Distributed using Serialization diff --git a/test/base.jl b/test/base.jl index 107aa63..adf0479 100644 --- a/test/base.jl +++ b/test/base.jl @@ -14,7 +14,7 @@ end addprocs(3) - @everywhere using DiDa + @everywhere using DistributedData W = workers() @testset "Distributed data transfers -- with workers" begin @@ -86,16 +86,16 @@ end @testset "Internal utilities" begin - @test DiDa.tmp_symbol(:test) != :test - @test DiDa.tmp_symbol(:test, prefix = "abc", + @test DistributedData.tmp_symbol(:test) != :test + @test DistributedData.tmp_symbol(:test, prefix = "abc", suffix = "def") == :abctestdef - @test DiDa.tmp_symbol(Dinfo(:test, W)) != :test + @test DistributedData.tmp_symbol(Dinfo(:test, W)) != :test end @testset "Persistent distributed data" begin di = dtransform(:(), x -> rand(5), W, :test) - files = DiDa.defaultFiles(di.val, di.workers) + files = DistributedData.defaultFiles(di.val, di.workers) @test allunique(files) orig = gather_array(di) diff --git a/test/runtests.jl b/test/runtests.jl index 61d0fd0..4441661 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,8 +1,8 @@ using Test -using DiDa, Distributed, Random +using DistributedData, Distributed, Random -@testset "DiDa tests" begin +@testset "DistributedData tests" begin include("base.jl") include("tools.jl") end diff --git a/test/tools.jl b/test/tools.jl index 55336b3..3ff879c 100644 --- a/test/tools.jl +++ b/test/tools.jl @@ -2,7 +2,7 @@ @testset "High-level tools" begin W = addprocs(2) - @everywhere using DiDa + @everywhere using DistributedData Random.seed!(1) dd = rand(11111, 5) From 0ea506d5cc4c7f042b305e56964c099b380b4c4b Mon Sep 17 00:00:00 2001 From: laurentheirendt Date: Wed, 27 Jan 2021 10:19:52 +0100 Subject: [PATCH 4/8] add codecov file --- codecov.yml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 codecov.yml diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..cbfea86 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,6 @@ +coverage: + status: + project: + default: + threshold: 15 + patch: off From 7895a4c156466f7932d84c0612a489bd1913e8e3 Mon Sep 17 00:00:00 2001 From: laurentheirendt Date: Wed, 27 Jan 2021 10:22:22 +0100 Subject: [PATCH 5/8] trigger codecov --- trigger-cc | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 trigger-cc diff --git a/trigger-cc b/trigger-cc new file mode 100644 index 0000000..e69de29 From 3f3e6cb70be8f85b05c471ac163dc536f86180d3 Mon Sep 17 00:00:00 2001 From: laurentheirendt Date: Wed, 27 Jan 2021 10:22:26 +0100 Subject: [PATCH 6/8] Revert "trigger codecov" This reverts commit 7895a4c156466f7932d84c0612a489bd1913e8e3. --- trigger-cc | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 trigger-cc diff --git a/trigger-cc b/trigger-cc deleted file mode 100644 index e69de29..0000000 From 0da157fa8f30643fdb0aab8c25e5d53f06642aa5 Mon Sep 17 00:00:00 2001 From: laurentheirendt Date: Wed, 27 Jan 2021 10:32:34 +0100 Subject: [PATCH 7/8] change version nb for first pkg registration --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index d6d192d..c099bb1 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "DistributedData" uuid = "f6a0035f-c5ac-4ad0-b410-ad102ced35df" authors = ["Mirek Kratochvil "] -version = "0.1.1" +version = "0.1.0" [deps] Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" From af153f254b17d352fe039b698f71fb45035dd58c Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Wed, 27 Jan 2021 10:39:12 +0100 Subject: [PATCH 8/8] update the contact information --- Project.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index d6d192d..c6d6359 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,7 @@ name = "DistributedData" uuid = "f6a0035f-c5ac-4ad0-b410-ad102ced35df" -authors = ["Mirek Kratochvil "] +authors = ["Mirek Kratochvil ", + "LCSB R3 team "] version = "0.1.1" [deps]