-
Notifications
You must be signed in to change notification settings - Fork 360
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AN-380 Make call cache hashing strategy configurable per filesystem and backend #7683
Changes from 33 commits
e712005
449567c
4a36763
98beda3
7ea5618
2f55cd4
6854497
61eae83
be3a8a3
0c32295
baf8151
6b435d5
c10ae98
065ff05
df9dccc
b82f2ec
0a101df
add5a8c
de33a85
875f9b5
16137f2
1966d28
d0ad499
e30b7c2
05876b3
3a9e180
22cbc1f
83c381d
1458942
c1c9c38
ed662f5
b3f04fd
c03f985
57ed4b4
1f036c1
cc0aba1
8c626c9
2071ecb
46eb4bd
892a5d4
c01fbb1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,9 @@ | ||
package cromwell.backend.standard.callcaching | ||
|
||
import java.util.concurrent.TimeoutException | ||
|
||
import akka.actor.{Actor, ActorLogging, ActorRef, Timers} | ||
import akka.event.LoggingAdapter | ||
import com.typesafe.config.Config | ||
import cromwell.backend.standard.StandardCachingActorHelper | ||
import cromwell.backend.standard.callcaching.RootWorkflowFileHashCacheActor.IoHashCommandWithContext | ||
import cromwell.backend.standard.callcaching.StandardFileHashingActor._ | ||
|
@@ -12,8 +12,11 @@ import cromwell.core.JobKey | |
import cromwell.core.callcaching._ | ||
import cromwell.core.io._ | ||
import cromwell.core.logging.JobLogging | ||
import cromwell.core.path.Path | ||
import net.ceedubs.ficus.Ficus._ | ||
import wom.values.WomFile | ||
|
||
import scala.jdk.CollectionConverters._ | ||
import scala.util.{Failure, Success, Try} | ||
|
||
/** | ||
|
@@ -48,6 +51,10 @@ case class FileHashContext(hashKey: HashKey, file: String) | |
class DefaultStandardFileHashingActor(standardParams: StandardFileHashingActorParams) | ||
extends StandardFileHashingActor(standardParams) { | ||
override val ioCommandBuilder: IoCommandBuilder = DefaultIoCommandBuilder | ||
|
||
override val defaultHashingStrategies: Map[String, FileHashStrategy] = Map( | ||
("drs", FileHashStrategy.Drs) | ||
) | ||
} | ||
|
||
object StandardFileHashingActor { | ||
|
@@ -80,10 +87,36 @@ abstract class StandardFileHashingActor(standardParams: StandardFileHashingActor | |
override lazy val serviceRegistryActor: ActorRef = standardParams.serviceRegistryActor | ||
override lazy val configurationDescriptor: BackendConfigurationDescriptor = standardParams.configurationDescriptor | ||
|
||
// Child classes can override to set per-filesystem defaults | ||
val defaultHashingStrategies: Map[String, FileHashStrategy] = Map.empty | ||
|
||
// Hashing strategy to use if none is specified. | ||
val fallbackHashingStrategy: FileHashStrategy = FileHashStrategy.Md5 | ||
|
||
// Combines defaultHashingStrategies with user-provided configuration | ||
lazy val hashingStrategies: Map[String, FileHashStrategy] = { | ||
val configuredHashingStrategies = for { | ||
fsConfigs <- configurationDescriptor.backendConfig.as[Option[Config]]("filesystems").toList | ||
fsKey <- fsConfigs.root.keySet().asScala | ||
configKey = s"${fsKey}.caching.hashing-strategy" | ||
// TODO this allows users to override with an empty list to prevent caching, is that desirable or a footgun? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is already a global I don't it makes sense to let users toggle call caching on a per-filesystem basis because one backend can use multiple filesystems in the same task (e.g. GCS + DRS). I would make empty list an exception if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point, will change. |
||
fileHashStrategyFromList = Try(fsConfigs.as[List[String]](configKey)).toOption | ||
fileHashStrategyFromString = Try(fsConfigs.as[String](configKey)).toOption.map(List(_)) | ||
fileHashStrategy <- fileHashStrategyFromList.orElse(fileHashStrategyFromString) | ||
} yield (fsKey, FileHashStrategy.of(fileHashStrategy)) | ||
|
||
defaultHashingStrategies ++ configuredHashingStrategies | ||
|
||
} | ||
|
||
protected def ioCommandBuilder: IoCommandBuilder = DefaultIoCommandBuilder | ||
|
||
// Used by ConfigBackend for synchronous hashing of local files | ||
def customHashStrategy(fileRequest: SingleFileHashRequest): Option[Try[String]] = None | ||
|
||
def hashStrategyForPath(p: Path): FileHashStrategy = | ||
hashingStrategies.getOrElse(p.filesystemTypeKey, fallbackHashingStrategy) | ||
|
||
def fileHashingReceive: Receive = { | ||
// Hash Request | ||
case fileRequest: SingleFileHashRequest => | ||
|
@@ -115,8 +148,9 @@ abstract class StandardFileHashingActor(standardParams: StandardFileHashingActor | |
def asyncHashing(fileRequest: SingleFileHashRequest, replyTo: ActorRef): Unit = { | ||
val fileAsString = fileRequest.file.value | ||
val ioHashCommandTry = for { | ||
gcsPath <- getPath(fileAsString) | ||
command <- ioCommandBuilder.hashCommand(gcsPath) | ||
path <- getPath(fileAsString) | ||
hashStrategy = hashStrategyForPath(path) | ||
command <- ioCommandBuilder.hashCommand(path, hashStrategy) | ||
} yield command | ||
lazy val fileHashContext = FileHashContext(fileRequest.hashKey, fileRequest.file.value) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be the list of md5 + identity?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the generic fallback across all backends and filesystems, since identity is only implemented for GCP I'm afraid that would confuse people.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That makes sense!