Skip to content

Commit

Permalink
Minor improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
fabriziosalmi committed Feb 1, 2025
1 parent 00db43b commit 00cfbb7
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 17 deletions.
4 changes: 3 additions & 1 deletion lxc_autoscale/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Any, Dict, List, Set, Union

import yaml
from logging_setup import setup_logging # Import the logging setup function

CONFIG_FILE = "/etc/lxc_autoscale/lxc_autoscale.yaml"

Expand All @@ -24,7 +25,8 @@ def load_tier_configurations() -> Dict[str, Dict[str, Any]]:
"""Load and validate tier configurations."""
tier_configs: Dict[str, Dict[str, Any]] = {}

logging.basicConfig(level=logging.INFO)
# Setup logging based on the configuration
setup_logging()

for section, values in config.items():
if section.startswith('TIER_'):
Expand Down
3 changes: 3 additions & 0 deletions lxc_autoscale/lock_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,17 @@ def acquire_lock():
# Open the lock file for writing
lock_file = open(LOCK_FILE, 'w')
try:
logging.info("Acquiring lock on %s", LOCK_FILE)
# Try to acquire an exclusive lock on the file (non-blocking)
fcntl.lockf(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
logging.info("Lock acquired on %s", LOCK_FILE)
# Yield control back to the calling context, keeping the lock in place
yield lock_file
except IOError:
# If the lock is already held by another process, log an error and exit
logging.error("Another instance of the script is already running. Exiting to avoid overlap.")
sys.exit(1)
finally:
logging.info("Lock released on %s", LOCK_FILE)
# Ensure the lock file is closed when done, releasing the lock
lock_file.close()
7 changes: 7 additions & 0 deletions lxc_autoscale/logging_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@ def setup_logging():
datefmt='%Y-%m-%d %H:%M:%S' # Date format for timestamps
)

# Debug: Verify that the log file is writable
try:
with open(LOG_FILE, 'a') as f:
f.write("# Log file initialized successfully.\n")
except Exception as err:
print(f"Error writing to log file ({LOG_FILE}): {err}")

# Create a console handler to output log messages to the console
console = logging.StreamHandler()
console.setLevel(logging.INFO) # Set the logging level for the console output
Expand Down
12 changes: 7 additions & 5 deletions lxc_autoscale/lxc_autoscale.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,20 @@ def parse_arguments() -> argparse.Namespace:
help="Rollback to previous container configurations" # Option to revert containers to their backed-up settings
)

return parser.parse_args()
args = parser.parse_args()
logging.debug(f"Parsed arguments: {args}")
return args


# Entry point of the script
if __name__ == "__main__":
# Setup logging based on the configuration
setup_logging()

# Parse command-line arguments
args: argparse.Namespace = parse_arguments()

# Remove automatic creation of log directory and file

# Setup logging based on the configuration
setup_logging()
logging.info("Starting LXC autoscaling daemon with arguments: %s", args)

# Acquire a lock to ensure that only one instance of the script runs at a time
with acquire_lock() as lock_file:
Expand Down
33 changes: 25 additions & 8 deletions lxc_autoscale/lxc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def close_ssh_client() -> None:
if ssh_client:
logging.debug("Closing SSH connection...")
ssh_client.close()
logging.info("SSH connection closed.")
ssh_client = None

def run_command(cmd: str, timeout: int = 30) -> Optional[str]:
Expand All @@ -67,6 +68,7 @@ def run_command(cmd: str, timeout: int = 30) -> Optional[str]:
"""
use_remote_proxmox = config.get('DEFAULT', {}).get('use_remote_proxmox', False)
logging.debug("Inside run_command: use_remote_proxmox = %s", use_remote_proxmox)
logging.debug(f"Running command: {cmd} (timeout: {timeout}s)")
return (run_remote_command if use_remote_proxmox else run_local_command)(cmd, timeout)


Expand All @@ -84,7 +86,7 @@ def run_local_command(cmd: str, timeout: int = 30) -> Optional[str]:
result = subprocess.check_output(
cmd, shell=True, timeout=timeout, stderr=subprocess.STDOUT,
).decode('utf-8').strip()
logging.debug("Command '%s' executed successfully. Output: %s", cmd, result)
logging.debug(f"Command '{cmd}' executed successfully. Output: {result}")
return result
except subprocess.TimeoutExpired:
logging.error("Command '%s' timed out after %d seconds", cmd, timeout)
Expand Down Expand Up @@ -112,7 +114,7 @@ def run_remote_command(cmd: str, timeout: int = 30) -> Optional[str]:
try:
_, stdout, _ = ssh.exec_command(cmd, timeout=timeout)
output = stdout.read().decode('utf-8').strip()
logging.debug("Remote command '%s' executed successfully: %s", cmd, output)
logging.debug(f"Remote command '{cmd}' executed successfully: {output}")
return output
except paramiko.SSHException as e:
logging.error("SSH execution failed: %s", str(e))
Expand All @@ -138,7 +140,9 @@ def get_containers() -> List[str]:

def is_ignored(ctid: str) -> bool:
"""Check if container should be ignored."""
return str(ctid) in IGNORE_LXC
ignored = str(ctid) in IGNORE_LXC
logging.debug(f"Container {ctid} is ignored: {ignored}")
return ignored

def is_container_running(ctid: str) -> bool:
"""Check if a container is running.
Expand All @@ -150,7 +154,9 @@ def is_container_running(ctid: str) -> bool:
True if the container is running, False otherwise.
"""
status = run_command(f"pct status {ctid}")
return bool(status and "status: running" in status.lower())
running = bool(status and "status: running" in status.lower())
logging.debug(f"Container {ctid} running status: {running}")
return running


def backup_container_settings(ctid: str, settings: Dict[str, Any]) -> None:
Expand Down Expand Up @@ -226,6 +232,7 @@ def log_json_event(ctid: str, action: str, resource_change: str) -> None:
with lock:
with open(LOG_FILE.replace('.log', '.json'), 'a', encoding='utf-8') as json_log_file:
json_log_file.write(json.dumps(log_data) + '\n')
logging.info("Logged event for container %s: %s - %s", ctid, action, resource_change)


def get_total_cores() -> int:
Expand Down Expand Up @@ -366,7 +373,9 @@ def get_memory_usage(ctid: str) -> float:
if mem_info:
try:
total, used = map(int, mem_info.split())
return (used * 100) / total
mem_usage = (used * 100) / total
logging.info("Memory usage for %s: %.2f%%", ctid, mem_usage)
return mem_usage
except ValueError:
logging.error("Failed to parse memory info for %s: '%s'", ctid, mem_info)
logging.error("Failed to get memory usage for %s", ctid)
Expand Down Expand Up @@ -406,6 +415,7 @@ def collect_data_for_container(ctid: str) -> Optional[Dict[str, Dict[str, Any]]]
"""Collect data for a single container."""
data = get_container_data(ctid)
if data:
logging.debug("Data collected for container %s: %s", ctid, data)
return {ctid: data}
return None

Expand Down Expand Up @@ -434,6 +444,7 @@ def collect_container_data() -> Dict[str, Dict[str, Any]]:
except Exception as e:
logging.error(f"Error collecting data for container {ctid}: {e}")

logging.info("Collected data for containers: %s", containers)
return containers


Expand Down Expand Up @@ -472,7 +483,9 @@ def get_container_config(ctid: str) -> Dict[str, Any]:
Returns:
The container's tier configuration.
"""
return LXC_TIER_ASSOCIATIONS.get(ctid, config)
config = LXC_TIER_ASSOCIATIONS.get(ctid, config)
logging.debug("Configuration for container %s: %s", ctid, config)
return config


def generate_unique_snapshot_name(base_name: str) -> str:
Expand All @@ -484,7 +497,9 @@ def generate_unique_snapshot_name(base_name: str) -> str:
Returns:
A unique snapshot name.
"""
return f"{base_name}-{datetime.now().strftime('%Y%m%d%H%M%S')}"
snapshot_name = f"{base_name}-{datetime.now().strftime('%Y%m%d%H%M%S')}"
logging.debug("Generated unique snapshot name: %s", snapshot_name)
return snapshot_name


def generate_cloned_hostname(base_name: str, clone_number: int) -> str:
Expand All @@ -497,7 +512,9 @@ def generate_cloned_hostname(base_name: str, clone_number: int) -> str:
Returns:
A unique hostname for the cloned container.
"""
return f"{base_name}-cloned-{clone_number}"
hostname = f"{base_name}-cloned-{clone_number}"
logging.debug("Generated cloned hostname: %s", hostname)
return hostname

import atexit
atexit.register(close_ssh_client)
Expand Down
14 changes: 11 additions & 3 deletions lxc_autoscale/scaling_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def calculate_increment(current: float, upper_threshold: float, min_increment: i
Calculated increment value.
"""
proportional_increment = int((current - upper_threshold) / CPU_SCALE_DIVISOR)
logging.debug(f"Calculated increment: {proportional_increment} (current: {current}, upper_threshold: {upper_threshold})")
return min(max(min_increment, proportional_increment), max_increment)


Expand All @@ -50,6 +51,7 @@ def calculate_decrement(current: float, lower_threshold: float, current_allocate
Calculated decrement value.
"""
dynamic_decrement = max(1, int((lower_threshold - current) / CPU_SCALE_DIVISOR))
logging.debug(f"Calculated decrement: {dynamic_decrement} (current: {current}, lower_threshold: {lower_threshold})")
return max(min(current_allocated - min_allocated, dynamic_decrement), min_decrement)


Expand All @@ -60,10 +62,13 @@ def get_behaviour_multiplier() -> float:
The behavior multiplier (1.0 for normal, 0.5 for conservative, 2.0 for aggressive).
"""
if DEFAULTS['behaviour'] == 'conservative':
return 0.5
multiplier = 0.5
elif DEFAULTS['behaviour'] == 'aggressive':
return 2.0
return 1.0
multiplier = 2.0
else:
multiplier = 1.0
logging.debug(f"Behavior multiplier set to {multiplier} based on configuration: {DEFAULTS['behaviour']}")
return multiplier


def scale_memory(ctid: str, mem_usage: float, mem_upper: float, mem_lower: float, current_memory: int, min_memory: int, available_memory: int, config: Dict[str, Any]) -> Tuple[int, bool]:
Expand All @@ -85,6 +90,8 @@ def scale_memory(ctid: str, mem_usage: float, mem_upper: float, mem_lower: float
memory_changed = False
behaviour_multiplier = get_behaviour_multiplier()

logging.info(f"Memory scaling for container {ctid} - Usage: {mem_usage}%, Upper threshold: {mem_upper}%, Lower threshold: {mem_lower}%")

if mem_usage > mem_upper:
increment = max(
int(config['memory_min_increment'] * behaviour_multiplier),
Expand Down Expand Up @@ -457,4 +464,5 @@ def is_off_peak() -> bool:
True if it is off-peak, otherwise False.
"""
current_hour = datetime.now().hour
logging.debug(f"Current hour: {current_hour}, Off-peak hours: {DEFAULTS['off_peak_start']} - {DEFAULTS['off_peak_end']}")
return DEFAULTS['off_peak_start'] <= current_hour or current_hour < DEFAULTS['off_peak_end']

0 comments on commit 00cfbb7

Please sign in to comment.