Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minimum bookable free mcp #1306

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/dao/CommentDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import com.imageworks.spcue.HostInterface;
import com.imageworks.spcue.JobInterface;

import java.util.List;

public interface CommentDao {

/**
Expand All @@ -32,6 +34,26 @@ public interface CommentDao {
*/
public void deleteComment(String id);

/**
* Deletes comments using host, user, and subject
*
* @param host
* @param user
* @param subject
* @return boolean: returns true if one or more comments where deleted
*/
public boolean deleteCommentByHostUserAndSubject(HostInterface host, String user, String subject);

/**
* Get comments using host, user, and subject
*
* @param host
* @param user
* @param subject
* @return List<Comment>
*/
public List<CommentDetail> getCommentsByHostUserAndSubject(HostInterface host, String user, String subject);

/**
* Retrieves the specified comment.
*
Expand Down
8 changes: 8 additions & 0 deletions cuebot/src/main/java/com/imageworks/spcue/dao/HostDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ public interface HostDao {
*/
void updateHostState(HostInterface host, HardwareState state);

/**
* updates a host with the passed free temporary directory
*
* @param host
* @param freeTempDir
*/
void updateHostFreeTempDir(HostInterface host, Long freeTempDir);

/**
* returns a full host detail
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.List;
import java.util.Map;

import org.springframework.jdbc.core.RowMapper;
Expand Down Expand Up @@ -71,6 +72,18 @@ public CommentDetail mapRow(ResultSet rs, int row) throws SQLException {
}
};

public boolean deleteCommentByHostUserAndSubject(HostInterface host, String user, String subject) {
return getJdbcTemplate().update(
"DELETE FROM comments WHERE pk_host=? AND str_user=? AND str_subject=?",
host.getHostId(), user, subject) > 0;
}

public List<CommentDetail> getCommentsByHostUserAndSubject(HostInterface host, String user, String subject) {
return getJdbcTemplate().query(
"SELECT * FROM comments WHERE pk_host=? AND str_user=? AND str_subject=?",
COMMENT_DETAIL_MAPPER, host.getHostId(), user, subject);
}

public CommentDetail getCommentDetail(String id) {
return getJdbcTemplate().queryForObject(
"SELECT * FROM comments WHERE pk_comment=?",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,13 @@ public void updateHostState(HostInterface host, HardwareState state) {
state.toString(), host.getHostId());
}

@Override
public void updateHostFreeTempDir(HostInterface host, Long freeTempDir) {
getJdbcTemplate().update(
"UPDATE host_stat SET int_mcp_free=? WHERE pk_host=?",
freeTempDir, host.getHostId());
}

@Override
public void updateHostSetAllocation(HostInterface host, AllocationInterface alloc) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,13 @@

import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.env.Environment;
import org.springframework.core.task.TaskRejectedException;
import org.springframework.dao.DataAccessException;
import org.springframework.dao.EmptyResultDataAccessException;

import com.imageworks.spcue.CommentDetail;
import com.imageworks.spcue.DispatchHost;
import com.imageworks.spcue.FrameInterface;
import com.imageworks.spcue.JobEntity;
Expand All @@ -57,6 +60,7 @@
import com.imageworks.spcue.rqd.RqdClient;
import com.imageworks.spcue.rqd.RqdClientException;
import com.imageworks.spcue.service.BookingManager;
import com.imageworks.spcue.service.CommentManager;
import com.imageworks.spcue.service.HostManager;
import com.imageworks.spcue.service.JobManager;
import com.imageworks.spcue.service.JobManagerSupport;
Expand All @@ -80,6 +84,14 @@ public class HostReportHandler {
private JobManagerSupport jobManagerSupport;
private JobDao jobDao;
private LayerDao layerDao;
@Autowired
private Environment env;
@Autowired
private CommentManager commentManager;
// Comment constants
private static final String SUBJECT_COMMENT_FULL_TEMP_DIR = "Host set to REPAIR for not having enough storage " +
"space on the temporary directory (mcp)";
private static final String CUEBOT_COMMENT_USER = "cuebot";

/**
* Boolean to toggle if this class is accepting data or not.
Expand Down Expand Up @@ -156,7 +168,7 @@ public void handleHostReport(HostReport report, boolean isBoot) {
rhost.getLoad(), new Timestamp(rhost.getBootTime() * 1000l),
rhost.getAttributesMap().get("SP_OS"));

changeHardwareState(host, report.getHost().getState(), isBoot);
changeHardwareState(host, report.getHost().getState(), isBoot, report.getHost().getFreeMcp());
changeNimbyState(host, report.getHost());

/**
Expand Down Expand Up @@ -221,7 +233,14 @@ public void handleHostReport(HostReport report, boolean isBoot) {
}
}

if (host.idleCores < Dispatcher.CORE_POINTS_RESERVED_MIN) {
// The minimum amount of free space in the temporary directory to book a host
Long minBookableFreeTempDir = env.getRequiredProperty("dispatcher.min_bookable_free_temp_dir_kb", Long.class);

if (minBookableFreeTempDir != -1 && report.getHost().getFreeMcp() < minBookableFreeTempDir) {
msg = String.format("%s doens't have enough free space in the temporary directory (mcp), %dMB needs %dMB",
host.name, (report.getHost().getFreeMcp()/1024), (minBookableFreeTempDir/1024));
}
else if (host.idleCores < Dispatcher.CORE_POINTS_RESERVED_MIN) {
msg = String.format("%s doesn't have enough idle cores, %d needs %d",
host.name, host.idleCores, Dispatcher.CORE_POINTS_RESERVED_MIN);
}
Expand All @@ -231,7 +250,7 @@ else if (host.idleMemory < Dispatcher.MEM_RESERVED_MIN) {
}
else if (report.getHost().getFreeMem() < CueUtil.MB512) {
msg = String.format("%s doens't have enough free system mem, %d needs %d",
host.name, report.getHost().getFreeMem(), Dispatcher.MEM_RESERVED_MIN);
host.name, report.getHost().getFreeMem(), Dispatcher.MEM_RESERVED_MIN);
}
else if(!host.hardwareState.equals(HardwareState.UP)) {
msg = host + " is not in the Up state.";
Expand Down Expand Up @@ -309,13 +328,61 @@ else if (!dispatchSupport.isCueBookable(host)) {
* updated with a boot report. If the state is Repair, then state is
* never updated via RQD.
*
*
* Prevent cue frames from booking on hosts with full temporary directories.
*
* Change host state to REPAIR or UP according the amount of free space
* in the temporary directory:
* - Set the host state to REPAIR, when the amount of free space in the
* temporary directory is less than the minimum required. Add a comment with
* subject: SUBJECT_COMMENT_FULL_TEMP_DIR
* - Set the host state to UP, when the amount of free space in the temporary directory
* is greater or equals to the minimum required and the host has a comment with
* subject: SUBJECT_COMMENT_FULL_TEMP_DIR
*
* @param host
* @param reportState
* @param isBoot
* @param freeTempDir
*/
private void changeHardwareState(DispatchHost host,
HardwareState reportState, boolean isBoot) {
private void changeHardwareState(DispatchHost host, HardwareState reportState, boolean isBoot, long freeTempDir) {

// The minimum amount of free space in the temporary directory to book a host
Long minBookableFreeTempDir = env.getRequiredProperty("dispatcher.min_bookable_free_temp_dir_kb", Long.class);

// Prevent cue frames from booking on hosts with full temporary directories
if (minBookableFreeTempDir != -1) {
if (host.hardwareState == HardwareState.UP && freeTempDir < minBookableFreeTempDir) {

// Insert a comment indicating that the Host status = Repair with reason = Full temporary directory
CommentDetail c = new CommentDetail();
c.subject = SUBJECT_COMMENT_FULL_TEMP_DIR;
c.user = CUEBOT_COMMENT_USER;
c.timestamp = null;
c.message = "Host " + host.getName() + " marked as REPAIR. The current amount of free space in the " +
"temporary directory (mcp) is " + (freeTempDir/1024) + "MB. It must have at least "
+ (minBookableFreeTempDir/1024) + "MB of free space in temporary directory";
commentManager.addComment(host, c);

// Set the host state to REPAIR
hostManager.setHostState(host, HardwareState.REPAIR);
host.hardwareState = HardwareState.REPAIR;

return;
} else if (host.hardwareState == HardwareState.REPAIR && freeTempDir >= minBookableFreeTempDir) {
// Check if the host with REPAIR status has comments with subject=SUBJECT_COMMENT_FULL_TEMP_DIR and
// user=CUEBOT_COMMENT_USER and delete the comments, if they exists
boolean commentsDeleted = commentManager.deleteCommentByHostUserAndSubject(host,
CUEBOT_COMMENT_USER, SUBJECT_COMMENT_FULL_TEMP_DIR);

if (commentsDeleted) {
// Set the host state to UP
hostManager.setHostState(host, HardwareState.UP);
host.hardwareState = HardwareState.UP;
return;
}
}
}

// If the states are the same there is no reason to do this update.
if (host.hardwareState.equals(reportState)) {
Expand Down Expand Up @@ -374,7 +441,7 @@ private void changeNimbyState(DispatchHost host, RenderHost rh) {
* locked if all cores are locked.
*
* @param host DispatchHost
* @param renderHost RenderHost
* @param coreInfo CoreDetail
*/
private void changeLockState(DispatchHost host, CoreDetail coreInfo) {
if (host.lockState == LockState.LOCKED) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import com.imageworks.spcue.HostInterface;
import com.imageworks.spcue.JobInterface;

import java.util.List;

public interface CommentManager {

/**
Expand All @@ -47,6 +49,26 @@ public interface CommentManager {
*/
public void deleteComment(String id);

/**
* Deletes comments using host, user, and subject
*
* @param host
* @param user
* @param subject
* @return boolean: returns true if one or more comments where deleted
*/
public boolean deleteCommentByHostUserAndSubject(HostInterface host, String user, String subject);

/**
* Get comments using host, user, and subject
*
* @param host
* @param user
* @param subject
* @return List<Comment>
*/
public List<CommentDetail> getCommentsByHostUserAndSubject(HostInterface host, String user, String subject);

/**
*
* @param id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
import com.imageworks.spcue.ShowEntity;
import com.imageworks.spcue.dao.CommentDao;

import java.util.List;

@Transactional
public class CommentManagerService implements CommentManager {

Expand Down Expand Up @@ -55,6 +57,16 @@ public void deleteComment(String id) {
commentDao.deleteComment(id);
}

@Transactional(propagation = Propagation.REQUIRED)
public boolean deleteCommentByHostUserAndSubject(HostInterface host, String user, String subject) {
return commentDao.deleteCommentByHostUserAndSubject(host, user, subject);
}

@Transactional(propagation = Propagation.REQUIRED)
public List<CommentDetail> getCommentsByHostUserAndSubject(HostInterface host, String user, String subject) {
return commentDao.getCommentsByHostUserAndSubject(host, user, subject);
}

@Transactional(propagation = Propagation.REQUIRED)
public void setCommentSubject(String id, String subject) {
commentDao.updateCommentSubject(id, subject);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,14 @@ public interface HostManager {
*/
void setHostState(HostInterface host, HardwareState state);

/**
* Updates the free temporary directory (mcp) of a host.
*
* @param host HostInterface
* @param freeTempDir Long
*/
void setHostFreeTempDir(HostInterface host, Long freeTempDir);

/**
* Return true if the host is swapping hard enough
* that killing frames will save the entire machine.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ public void setHostState(HostInterface host, HardwareState state) {
hostDao.updateHostState(host, state);
}

@Override
public void setHostFreeTempDir(HostInterface host, Long freeTempDir) {
hostDao.updateHostFreeTempDir(host, freeTempDir);
}

@Override
@Transactional(propagation = Propagation.REQUIRED, readOnly=true)
public boolean isSwapping(HostInterface host) {
Expand Down
6 changes: 6 additions & 0 deletions cuebot/src/main/resources/opencue.properties
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ dispatcher.report_queue.max_pool_size=8
# Queue capacity for handling Host Report.
dispatcher.report_queue.queue_capacity=1000

# The minimum amount of free space in the temporary directory (mcp) to book a host.
# E.g: 1G = 1048576 kB => dispatcher.min_bookable_free_temp_dir_kb=1048576
# Default = -1 (deactivated)
# If equals to -1, it means the feature is turned off
dispatcher.min_bookable_free_temp_dir_kb=-1

# Number of threads to keep in the pool for kill frame operation.
dispatcher.kill_queue.core_pool_size=6
# Maximum number of threads to allow in the pool for kill frame operation.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import com.imageworks.spcue.service.HostManager;
import com.imageworks.spcue.service.JobLauncher;
import com.imageworks.spcue.service.JobManager;
import com.imageworks.spcue.util.CueUtil;

import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.Assert.assertEquals;
Expand Down Expand Up @@ -209,11 +210,12 @@ private void launchJobs() {
private RenderHost.Builder buildRenderHost() {
return RenderHost.newBuilder()
.setBootTime(1192369572)
.setFreeMcp(76020)
// The minimum amount of free space in the temporary directory to book a host.
.setFreeMcp(CueUtil.GB)
.setFreeMem(53500)
.setFreeSwap(20760)
.setLoad(1)
.setTotalMcp(195430)
.setTotalMcp(CueUtil.GB4)
.setTotalMem(8173264)
.setTotalSwap(20960)
.setNimbyEnabled(false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,12 @@ public DispatchHost createHost() {
RenderHost host = RenderHost.newBuilder()
.setName("test_host")
.setBootTime(1192369572)
.setFreeMcp(76020)
// The minimum amount of free space in the temporary directory to book a host.
.setFreeMcp(CueUtil.GB)
.setFreeMem(53500)
.setFreeSwap(20760)
.setLoad(1)
.setTotalMcp(195430)
.setTotalMcp(CueUtil.GB4)
.setTotalMem((int) CueUtil.GB16)
.setTotalSwap((int) CueUtil.GB16)
.setNimbyEnabled(false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,12 @@ public void testInsertCommentOnHost() {
RenderHost host = RenderHost.newBuilder()
.setName("boo")
.setBootTime(1192369572)
.setFreeMcp(76020)
// The minimum amount of free space in the temporary directory to book a host.
.setFreeMcp(CueUtil.GB)
.setFreeMem(15290520)
.setFreeSwap(2076)
.setLoad(1)
.setTotalMcp(19543)
.setTotalMcp(CueUtil.GB4)
.setTotalMem(15290520)
.setTotalSwap(2096)
.setNimbyEnabled(false)
Expand Down
Loading
Loading