Merge pull request #12 from LCSB-BioCore/develop

laurentheirendt · web-flow · commit 24a6fc34c540 · 2021-01-27T10:59:35.000+01:00
Develop
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -4,9 +4,9 @@ name: Documentation
 on:
   push:
     branches:
+      - master
       - develop
     tags: '*'
-  pull_request:
   release:
     types: [published, created]
 
diff --git a/Project.toml b/Project.toml
@@ -1,6 +1,7 @@
-name = "DiDa"
+name = "DistributedData"
 uuid = "f6a0035f-c5ac-4ad0-b410-ad102ced35df"
-authors = ["Mirek Kratochvil <exa.exa@gmail.com>"]
+authors = ["Mirek Kratochvil <miroslav.kratochvil@uni.lu>",
+           "LCSB R3 team <lcsb-r3@uni.lu>"]
 version = "0.1.0"
 
 [deps]
diff --git a/README.md b/README.md
@@ -1,20 +1,20 @@
-# <img src="docs/src/assets/logo.svg" alt="DiDa.jl logo" height="32px"> DiDa.jl
+# <img src="docs/src/assets/logo.svg" alt="DistributedData.jl logo" height="32px"> DistributedData.jl
 
 
 | Build status | Documentation |
 |:---:|:---:|
-| ![CI](https://github.com/LCSB-BioCore/DiDa.jl/workflows/CI/badge.svg?branch=develop) | [![doc](https://img.shields.io/badge/docs-stable-blue)](https://lcsb-biocore.github.io/DiDa.jl/stable/) [![doc](https://img.shields.io/badge/docs-dev-blue)](https://lcsb-biocore.github.io/DiDa.jl/dev/) |
+| ![CI](https://github.com/LCSB-BioCore/DistributedData.jl/workflows/CI/badge.svg?branch=develop) | [![doc](https://img.shields.io/badge/docs-stable-blue)](https://lcsb-biocore.github.io/DistributedData.jl/stable/) [![doc](https://img.shields.io/badge/docs-dev-blue)](https://lcsb-biocore.github.io/DistributedData.jl/dev/) |
 
 Simple distributed data manipulation and processing routines for Julia.
 
 This was originally developed for
-[`GigaSOM.jl`](https://github.com/LCSB-BioCore/GigaSOM.jl); DiDa.jl package
+[`GigaSOM.jl`](https://github.com/LCSB-BioCore/GigaSOM.jl); DistributedData.jl package
 contains the separated-out lightweight distributed-processing framework that
 was used in `GigaSOM.jl`.
 
 ## Why?
 
-DiDa.jl provides a very simple, imperative and straightforward way to move your
+DistributedData.jl provides a very simple, imperative and straightforward way to move your
 data around a cluster of Julia processes created by the
 [`Distributed`](https://docs.julialang.org/en/v1/stdlib/Distributed/) package,
 and run computation on the distributed data pieces. The main aim of the package
@@ -43,14 +43,14 @@ same way, but takes the data back from the worker.
 You can thus send some random array to a few distributed workers:
 
 ```julia
-julia> using Distributed, DiDa
+julia> using Distributed, DistributedData
 
 julia> addprocs(2)
 2-element Array{Int64,1}:
  2
  3
 
-julia> @everywhere using DiDa
+julia> @everywhere using DistributedData
 
 julia> save_at(2, :x, randn(10,10))
 Future(2, 1, 4, nothing)
@@ -132,9 +132,3 @@ julia> gather_array(dataset) # download the data from workers to a sing
  0.610183  1.12165   0.722438
   ⋮
 ```
-
-## What does the name `DiDa` mean?
-
-**Di**stributed **Da**ta.
-
-There is no consensus on how to pronounce the shortcut.
diff --git a/codecov.yml b/codecov.yml
@@ -0,0 +1,6 @@
+coverage:
+  status:
+    project:
+      default:
+        threshold: 15
+    patch: off
diff --git a/docs/make.jl b/docs/make.jl
@@ -1,10 +1,10 @@
-using Documenter, DiDa
+using Documenter, DistributedData
 
-makedocs(modules = [DiDa],
+makedocs(modules = [DistributedData],
     clean = false,
     format = Documenter.HTML(prettyurls = !("local" in ARGS)),
-    sitename = "DiDa.jl",
-    authors = "The developers of DiDa.jl",
+    sitename = "DistributedData.jl",
+    authors = "The developers of DistributedData.jl",
     linkcheck = !("skiplinks" in ARGS),
     pages = [
         "Documentation" => "index.md",
@@ -14,7 +14,7 @@ makedocs(modules = [DiDa],
 )
 
 deploydocs(
-    repo = "github.com/LCSB-BioCore/DiDa.jl.git",
+    repo = "github.com/LCSB-BioCore/DistributedData.jl.git",
     target = "build",
     branch = "gh-pages",
     devbranch = "develop",
diff --git a/docs/src/functions.md b/docs/src/functions.md
@@ -3,27 +3,27 @@
 ## Data structures
 
 ```@autodocs
-Modules = [DiDa]
+Modules = [DistributedData]
 Pages = ["structs.jl"]
 ```
 
 ## Base functions
 
 ```@autodocs
-Modules = [DiDa]
+Modules = [DistributedData]
 Pages = ["base.jl"]
 ```
 
 ## Higher-level array operations
 
 ```@autodocs
-Modules = [DiDa]
+Modules = [DistributedData]
 Pages = ["tools.jl"]
 ```
 
 ## Input/Output
 
 ```@autodocs
-Modules = [DiDa]
+Modules = [DistributedData]
 Pages = ["io.jl"]
 ```
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -1,5 +1,5 @@
 
-# DiDa.jl — simple work with distributed data
+# DistributedData.jl — simple work with distributed data
 
 This packages provides simple Distributed Data manipulation and processing
 routines for Julia.
diff --git a/docs/src/tutorial.md b/docs/src/tutorial.md
@@ -1,26 +1,26 @@
 
-# DiDa tutorial
+# DistributedData tutorial
 
-The primary purpose of this tutorial is to get a basic grasp of the main `DiDa`
+The primary purpose of this tutorial is to get a basic grasp of the main `DistributedData`
 functions and methodology.
 
 For starting up, let's create a few distributed workers and import the package:
 
 ```julia
-julia> using Distributed, DiDa
+julia> using Distributed, DistributedData
 
 julia> addprocs(3)
 2-element Array{Int64,1}:
  2
  3
  4
 
-julia> @everywhere using DiDa
+julia> @everywhere using DistributedData
 ```
 
 ## Moving the data around
 
-In `DiDa`, the storage of distributed data is done in the "native" Julia way --
+In `DistributedData`, the storage of distributed data is done in the "native" Julia way --
 the data is stored in normal named variables. Each node holds its own data in
 an arbitrary set of variables as "plain data"; content of these variables is
 completely independent among nodes.
@@ -52,7 +52,7 @@ UndefValError: x not defined
 …
 ```
 
-`DiDa` uses *quoting* to allow you to precisely specify the parts of the code
+`DistributedData` uses *quoting* to allow you to precisely specify the parts of the code
 that should be evaluated on the "main" Julia process (the one you interact
 with), and the code that should be evaluated on the remote workers.  Basically,
 all quoted code is going to get to the workers without any evaluation; all
@@ -192,7 +192,7 @@ beneficial for implementing advanced parallel algorithms.
 
 Remembering and managing the remote variable names and worker numbers is
 extremely impractical, especially if you need to maintain multiple variables on
-various subsets of all available workers at once. `DiDa` defines a small
+various subsets of all available workers at once. `DistributedData` defines a small
 [`Dinfo`](@ref) data structure that keeps that information for you. Many other
 functions are able to work with `Dinfo` transparently, instead of the "raw"
 symbols and worker lists.
@@ -354,7 +354,7 @@ julia> dmap(Vector(1:length(workers())),
 
 ## Persisting the data
 
-`DiDa` provides support for storing the loaded dataset in each worker's local
+`DistributedData` provides support for storing the loaded dataset in each worker's local
 storage. This is quite beneficial for saving sub-results and various artifacts
 of the computation process for later use, without unnecessarily wasting
 main memory.
@@ -378,9 +378,9 @@ significant overhead.
 
 ## Miscellaneous functions
 
-For convenience, `DiDa` also contains simple implementations of various common
+For convenience, `DistributedData` also contains simple implementations of various common
 utility operations for processing matrix data. These originated in
-flow-cytometry use-cases (which is what `DiDa` was originally built for), but
+flow-cytometry use-cases (which is what `DistributedData` was originally built for), but
 are applicable in many other areas of data analysis:
 
 - [`dselect`](@ref) reduces a matrix to several selected columns (in a
diff --git a/src/DistributedData.jl b/src/DistributedData.jl
@@ -1,4 +1,4 @@
-module DiDa
+module DistributedData
 
 using Distributed
 using Serialization
diff --git a/test/base.jl b/test/base.jl
@@ -14,7 +14,7 @@
     end
 
     addprocs(3)
-    @everywhere using DiDa
+    @everywhere using DistributedData
     W = workers()
 
     @testset "Distributed data transfers -- with workers" begin
@@ -86,16 +86,16 @@
     end
 
     @testset "Internal utilities" begin
-        @test DiDa.tmp_symbol(:test) != :test
-        @test DiDa.tmp_symbol(:test, prefix = "abc",
+        @test DistributedData.tmp_symbol(:test) != :test
+        @test DistributedData.tmp_symbol(:test, prefix = "abc",
                                      suffix = "def") == :abctestdef
-        @test DiDa.tmp_symbol(Dinfo(:test, W)) != :test
+        @test DistributedData.tmp_symbol(Dinfo(:test, W)) != :test
     end
 
     @testset "Persistent distributed data" begin
         di = dtransform(:(), x -> rand(5), W, :test)
 
-        files = DiDa.defaultFiles(di.val, di.workers)
+        files = DistributedData.defaultFiles(di.val, di.workers)
         @test allunique(files)
 
         orig = gather_array(di)
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,8 +1,8 @@
 
 using Test
-using DiDa, Distributed, Random
+using DistributedData, Distributed, Random
 
-@testset "DiDa tests" begin
+@testset "DistributedData tests" begin
     include("base.jl")
     include("tools.jl")
 end
diff --git a/test/tools.jl b/test/tools.jl
@@ -2,7 +2,7 @@
 @testset "High-level tools" begin
 
     W = addprocs(2)
-    @everywhere using DiDa
+    @everywhere using DistributedData
 
     Random.seed!(1)
     dd = rand(11111, 5)

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-module DiDa`
	`1`	`+module DistributedData`
`2`	`2`
`3`	`3`	`using Distributed`
`4`	`4`	`using Serialization`