SCons: Improve cache purging logic

This commit is contained in:
Thaddeus Crews 2024-10-13 13:59:33 -05:00
parent 92e51fca72
commit c1e70e6cf0
No known key found for this signature in database
GPG key ID: 62181B86FE9E5D84
3 changed files with 97 additions and 139 deletions

View file

@ -29,7 +29,6 @@ runs:
# 4. A partial match for the same base branch only (not ideal, matches any PR with the same base branch).
restore-keys: |
${{ inputs.cache-name }}-${{ env.GODOT_BASE_BRANCH }}-${{ github.ref }}-${{ github.sha }}
${{ inputs.cache-name }}-${{ env.GODOT_BASE_BRANCH }}-${{ github.ref }}
${{ inputs.cache-name }}-${{ env.GODOT_BASE_BRANCH }}-refs/heads/${{ env.GODOT_BASE_BRANCH }}
${{ inputs.cache-name }}-${{ env.GODOT_BASE_BRANCH }}

View file

@ -1048,10 +1048,19 @@ GLSL_BUILDERS = {
}
env.Append(BUILDERS=GLSL_BUILDERS)
scons_cache_path = os.environ.get("SCONS_CACHE")
if scons_cache_path is not None:
CacheDir(scons_cache_path)
print("Scons cache enabled... (path: '" + scons_cache_path + "')")
env["SCONS_CACHE"] = os.environ.get("SCONS_CACHE", "")
env["SCONS_CACHE_LIMIT"] = int(os.getenv("SCONS_CACHE_LIMIT", 1024)) * 1024 * 1024
if env["SCONS_CACHE"]:
CacheDir(env["SCONS_CACHE"])
print(f'Scons cache enabled... (path: "{env["SCONS_CACHE"]}")')
if env["verbose"]:
print(
"Current cache limit is {} (used: {})".format(
methods.convert_size(env["SCONS_CACHE_LIMIT"]),
methods.convert_size(methods.get_size(env["SCONS_CACHE"])),
)
)
if env["vsproj"]:
env.vs_incs = []
@ -1147,4 +1156,4 @@ def purge_flaky_files():
atexit.register(purge_flaky_files)
methods.clean_cache(env)
atexit.register(methods.clean_cache, env)

View file

@ -1,5 +1,6 @@
import contextlib
import glob
import math
import os
import re
import subprocess
@ -903,160 +904,109 @@ def using_emcc(env):
return "emcc" in os.path.basename(env["CC"])
def convert_size(size_bytes: int) -> str:
if size_bytes == 0:
return "0 bytes"
SIZE_NAMES = ["bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
index = int(math.floor(math.log(size_bytes, 1024)))
power = math.pow(1024, index)
size = round(size_bytes / power, 2)
return f"{size} {SIZE_NAMES[index]}"
def get_size(start_path: str = ".") -> int:
total_size = 0
for dirpath, _, filenames in os.walk(start_path):
for file in filenames:
path = os.path.join(dirpath, file)
total_size += os.path.getsize(path)
return total_size
def show_progress(env):
if env["ninja"]:
# Has its own progress/tracking tool that clashes with ours
# Progress reporting is not available in non-TTY environments since it messes with the output
# (for example, when writing to a file). Ninja has its own progress/tracking tool that clashes
# with ours.
if not env["progress"] or not sys.stdout.isatty() or env["ninja"]:
return
import sys
NODE_COUNT_FILENAME = f"{base_folder_path}.scons_node_count"
from SCons.Script import AlwaysBuild, Command, Progress
screen = sys.stdout
# Progress reporting is not available in non-TTY environments since it
# messes with the output (for example, when writing to a file)
show_progress = env["progress"] and sys.stdout.isatty()
node_count = 0
node_count_max = 0
node_count_interval = 1
node_count_fname = str(env.Dir("#")) + "/.scons_node_count"
import math
class cache_progress:
# The default is 1 GB cache
def __init__(self, path=None, limit=pow(1024, 3)):
self.path = path
self.limit = limit
if env["verbose"] and path is not None:
screen.write(
"Current cache limit is {} (used: {})\n".format(
self.convert_size(limit), self.convert_size(self.get_size(path))
)
)
class ShowProgress:
def __init__(self):
self.count = 0
self.max = 0
try:
with open(NODE_COUNT_FILENAME, "r", encoding="utf-8") as f:
self.max = int(f.readline())
except OSError:
pass
if self.max == 0:
print("NOTE: Performing initial build, progress percentage unavailable!")
def __call__(self, node, *args, **kw):
nonlocal node_count, node_count_max, node_count_interval, node_count_fname, show_progress
if show_progress:
# Print the progress percentage
node_count += node_count_interval
if node_count_max > 0 and node_count <= node_count_max:
screen.write("\r[%3d%%] " % (node_count * 100 / node_count_max))
screen.flush()
elif node_count_max > 0 and node_count > node_count_max:
screen.write("\r[100%] ")
screen.flush()
else:
screen.write("\r[Initial build] ")
screen.flush()
self.count += 1
if self.max != 0:
percent = int(min(self.count * 100 / self.max, 100))
sys.stdout.write(f"\r[{percent:3d}%] ")
sys.stdout.flush()
def convert_size(self, size_bytes):
if size_bytes == 0:
return "0 bytes"
size_name = ("bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
i = int(math.floor(math.log(size_bytes, 1024)))
p = math.pow(1024, i)
s = round(size_bytes / p, 2)
return "%s %s" % (int(s) if i == 0 else s, size_name[i])
from SCons.Script import Progress
def get_size(self, start_path="."):
total_size = 0
for dirpath, dirnames, filenames in os.walk(start_path):
for f in filenames:
fp = os.path.join(dirpath, f)
total_size += os.path.getsize(fp)
return total_size
progressor = ShowProgress()
Progress(progressor)
def progress_finish(target, source, env):
nonlocal node_count, progressor
try:
with open(node_count_fname, "w", encoding="utf-8", newline="\n") as f:
f.write("%d\n" % node_count)
except Exception:
with open(NODE_COUNT_FILENAME, "w", encoding="utf-8", newline="\n") as f:
f.write(f"{progressor.count}\n")
except OSError:
pass
try:
with open(node_count_fname, "r", encoding="utf-8") as f:
node_count_max = int(f.readline())
except Exception:
pass
cache_directory = os.environ.get("SCONS_CACHE")
# Simple cache pruning, attached to SCons' progress callback. Trim the
# cache directory to a size not larger than cache_limit.
cache_limit = float(os.getenv("SCONS_CACHE_LIMIT", 1024)) * 1024 * 1024
progressor = cache_progress(cache_directory, cache_limit)
Progress(progressor, interval=node_count_interval)
progress_finish_command = Command("progress_finish", [], progress_finish)
AlwaysBuild(progress_finish_command)
env.AlwaysBuild(
env.CommandNoCache(
"progress_finish", [], env.Action(progress_finish, "Building node count database .scons_node_count")
)
)
def clean_cache(env):
import atexit
import time
if env.GetOption("clean") or not env["SCONS_CACHE"]:
return
class cache_clean:
def __init__(self, path=None, limit=pow(1024, 3)):
self.path = path
self.limit = limit
files = glob.glob(os.path.join(env["SCONS_CACHE"], "*", "*"))
if not files:
return
def clean(self):
self.delete(self.file_list())
def delete(self, files):
if len(files) == 0:
return
if env["verbose"]:
# Utter something
print("Purging %d %s from cache..." % (len(files), "files" if len(files) > 1 else "file"))
[os.remove(f) for f in files]
def file_list(self):
if self.path is None:
# Nothing to do
return []
# Gather a list of (filename, (size, atime)) within the
# cache directory
file_stat = [(x, os.stat(x)[6:8]) for x in glob.glob(os.path.join(self.path, "*", "*"))]
if file_stat == []:
# Nothing to do
return []
# Weight the cache files by size (assumed to be roughly
# proportional to the recompilation time) times an exponential
# decay since the ctime, and return a list with the entries
# (filename, size, weight).
current_time = time.time()
file_stat = [(x[0], x[1][0], (current_time - x[1][1])) for x in file_stat]
# Sort by the most recently accessed files (most sensible to keep) first
file_stat.sort(key=lambda x: x[2])
# Search for the first entry where the storage limit is
# reached
sum, mark = 0, None
for i, x in enumerate(file_stat):
sum += x[1]
if sum > self.limit:
mark = i
break
if mark is None:
return []
else:
return [x[0] for x in file_stat[mark:]]
def cache_finally():
nonlocal cleaner
# Remove all text files, store binary files in list of (filename, size, atime).
purge = []
stats = []
for file in files:
# A bit of an antipattern, but failing a utf-8 decode is the easiest way to determine if
# a file is binary.
try:
cleaner.clean()
except Exception:
pass
with open(file, encoding="utf-8") as out:
out.read(100)
purge.append(file)
except UnicodeDecodeError:
stats.append((file, *os.stat(file)[6:8]))
cache_directory = os.environ.get("SCONS_CACHE")
# Simple cache pruning, attached to SCons' progress callback. Trim the
# cache directory to a size not larger than cache_limit.
cache_limit = float(os.getenv("SCONS_CACHE_LIMIT", 1024)) * 1024 * 1024
cleaner = cache_clean(cache_directory, cache_limit)
# Sort by most recent access (most sensible to keep) first. Search for the first entry where
# the cache limit is reached.
stats.sort(key=lambda x: x[2], reverse=True)
cache_limit = env["SCONS_CACHE_LIMIT"]
sum = 0
for index, stat in enumerate(stats):
sum += stat[1]
if sum > cache_limit:
purge.extend([x[0] for x in stats[index:]])
break
atexit.register(cache_finally)
if purge:
if env["verbose"]:
print("Purging %d %s from cache..." % (len(purge), "files" if len(purge) > 1 else "file"))
for file in purge:
os.remove(file)
def dump(env):