jj-split-by-owner

This commit is contained in:
Matthew Ryan Dillon 2025-12-11 15:30:10 -05:00
parent ca5c6a6f03
commit 36e4633ebf

View file

@ -0,0 +1,309 @@
#!/usr/bin/env bash
#
# jj-split-by-owner - Split a jj revision into multiple revisions by CODEOWNERS
#
# Usage: jj-split-by-owner <revision> <description_template>
#
# The description_template should contain %s where you want the team name inserted.
# Example: "feat: update %s components"
# -> "feat: update @frontend-team components"
# -> "feat: update @backend-team components"
#
# Example: "[%s] refactor authentication"
# -> "[@security-team] refactor authentication"
#
set -euo pipefail
usage() {
cat <<EOF
Usage: $(basename "$0") <revision> <description_template>
Split a jj revision into multiple revisions based on CODEOWNERS.
Arguments:
revision The jj revision to split (e.g., @, @-, abc123)
description_template A printf-style template with %s for the team name
Example: "feat: update %s components"
The script will:
1. Find all changed files in the revision
2. Match each file to its owner from CODEOWNERS
3. Create separate revisions for each unique owner
4. Each revision gets the template with the owner name substituted
EOF
exit 1
}
# Check arguments
if [[ $# -lt 2 ]]; then
usage
fi
REVISION="$1"
TEMPLATE="$2"
# Create temp directory for all working files
TMPDIR=$(mktemp -d)
trap 'rm -rf "$TMPDIR"' EXIT
# Find CODEOWNERS file (check common locations)
find_codeowners() {
local locations=(
"CODEOWNERS"
".github/CODEOWNERS"
"docs/CODEOWNERS"
".gitlab/CODEOWNERS"
)
for loc in "${locations[@]}"; do
if [[ -f "$loc" ]]; then
echo "$loc"
return 0
fi
done
echo "Error: CODEOWNERS file not found" >&2
exit 1
}
CODEOWNERS_FILE=$(find_codeowners)
echo "Using CODEOWNERS: $CODEOWNERS_FILE"
# Get list of changed files in the revision
get_changed_files() {
jj diff -r "$REVISION" --summary | awk '{print $2}'
}
# Parse CODEOWNERS and convert patterns to regex in a single awk pass
# Output format: regex<TAB>owner (one per line, in order)
parse_and_convert_codeowners() {
awk '
# Skip empty lines and comments
/^[[:space:]]*$/ { next }
/^[[:space:]]*#/ { next }
{
pattern = $1
owner = $2
# Skip if no pattern or no owner
if (pattern == "" || owner == "") next
# Convert pattern to regex
regex = pattern
# Escape special regex chars (except *)
gsub(/\./, "\\.", regex)
gsub(/\?/, "\\?", regex)
gsub(/\+/, "\\+", regex)
gsub(/\[/, "\\[", regex)
gsub(/\]/, "\\]", regex)
gsub(/\^/, "\\^", regex)
gsub(/\$/, "\\$", regex)
# Handle ** first (match any path including /)
gsub(/\*\*/, ".*", regex)
# Handle * (match anything except /) - but not the .* we just created
# We need to be careful here: .* should stay, but lone * becomes [^/]*
# Split and rejoin to handle this
result = ""
n = split(regex, parts, /\.\*/)
for (i = 1; i <= n; i++) {
gsub(/\*/, "[^/]*", parts[i])
if (i > 1) result = result ".*"
result = result parts[i]
}
regex = result
# Handle leading / (anchor to start)
if (substr(regex, 1, 1) == "/") {
regex = "^" substr(regex, 2)
} else {
# Pattern can match anywhere in path
regex = "(^|/)" regex
}
# Handle trailing / (directory - match anything inside)
if (substr(regex, length(regex), 1) == "/") {
regex = regex ".*"
}
# Anchor to end unless it ends with .*
if (substr(regex, length(regex)-1, 2) != ".*") {
regex = regex "$"
}
print regex "\t" owner
}
' "$CODEOWNERS_FILE"
}
# Main logic
echo "Analyzing revision: $REVISION"
echo ""
# Get changed files
get_changed_files > "$TMPDIR/files.txt"
FILE_COUNT=$(wc -l < "$TMPDIR/files.txt" | tr -d ' ')
if [[ "$FILE_COUNT" -eq 0 ]]; then
echo "No changed files in revision $REVISION"
exit 0
fi
echo "Found $FILE_COUNT changed file(s)"
# Parse CODEOWNERS and convert to regex in one pass
echo "Parsing CODEOWNERS..."
parse_and_convert_codeowners > "$TMPDIR/codeowners_regex.txt"
line_num=$(wc -l < "$TMPDIR/codeowners_regex.txt" | tr -d ' ')
echo "Loaded $line_num CODEOWNERS rules"
echo ""
# Now match each file to its owner
# CODEOWNERS semantics: later rules override earlier ones
echo "Matching files to owners..."
# Process all files at once with awk for speed
awk -F'\t' '
BEGIN {
# Read all patterns and their owners
while ((getline line < "'"$TMPDIR/codeowners_regex.txt"'") > 0) {
n = split(line, parts, "\t")
if (n >= 2) {
pattern_count++
patterns[pattern_count] = parts[1] # regex
owners[pattern_count] = parts[2] # owner
}
}
close("'"$TMPDIR/codeowners_regex.txt"'")
}
{
file = $0
matched_owner = "UNOWNED"
# Check each pattern (later ones override)
for (i = 1; i <= pattern_count; i++) {
if (match(file, patterns[i])) {
matched_owner = owners[i]
}
}
print file "\t" matched_owner
}
' "$TMPDIR/files.txt" > "$TMPDIR/file_owners.txt"
# Group files by owner
echo "Grouping files by owner..."
mkdir -p "$TMPDIR/owners"
ALL_OWNERS=""
while IFS=$'\t' read -r file owner; do
# Sanitize owner name for filename
owner_safe=$(echo "$owner" | sed 's/[^a-zA-Z0-9_-]/_/g')
echo "$file" >> "$TMPDIR/owners/$owner_safe.files"
echo "$owner" > "$TMPDIR/owners/$owner_safe.name"
# Track unique owners
if [[ ! -f "$TMPDIR/owners/$owner_safe.seen" ]]; then
touch "$TMPDIR/owners/$owner_safe.seen"
ALL_OWNERS="${ALL_OWNERS:+$ALL_OWNERS }$owner_safe"
fi
done < "$TMPDIR/file_owners.txt"
# Show what we found
echo ""
echo "Files by owner:"
echo "---------------"
for owner_safe in $ALL_OWNERS; do
owner=$(cat "$TMPDIR/owners/$owner_safe.name")
file_count=$(wc -l < "$TMPDIR/owners/$owner_safe.files" | tr -d ' ')
echo " $owner: $file_count file(s)"
done
echo ""
# Count owners
OWNER_COUNT=$(echo "$ALL_OWNERS" | wc -w | tr -d ' ')
echo "Will create $OWNER_COUNT revision(s)"
echo ""
# If only one owner, no split needed
if [[ "$OWNER_COUNT" -eq 1 ]]; then
owner_safe=$(echo "$ALL_OWNERS" | awk '{print $1}')
owner=$(cat "$TMPDIR/owners/$owner_safe.name")
# shellcheck disable=SC2059
description=$(printf "$TEMPLATE" "$owner")
echo "Only one owner found. Updating revision description..."
jj describe -r "$REVISION" -m "$description"
echo "Done! Revision updated with description: $description"
exit 0
fi
# Convert to array for indexing
OWNERS_ARR=($ALL_OWNERS)
FIRST_OWNER_SAFE="${OWNERS_ARR[0]}"
FIRST_OWNER=$(cat "$TMPDIR/owners/$FIRST_OWNER_SAFE.name")
echo "Processing owners..."
echo ""
# Get the parent of the original revision
PARENT=$(jj log -r "parents($REVISION)" --no-graph -T 'change_id.short(12)' | head -1)
# Get the original revision's change_id for reference
ORIG_CHANGE_ID=$(jj log -r "$REVISION" --no-graph -T 'change_id.short(12)')
# Create a new revision for each owner (original revision stays untouched)
for i in "${!OWNERS_ARR[@]}"; do
owner_safe="${OWNERS_ARR[$i]}"
owner=$(cat "$TMPDIR/owners/$owner_safe.name")
# shellcheck disable=SC2059
description=$(printf "$TEMPLATE" "$owner")
# Create a new empty revision as sibling of original (this changes @)
jj new "$PARENT" -m "$description" >/dev/null 2>&1
# @ is now the new revision, get its change_id
new_rev=$(jj log -r @ --no-graph -T 'change_id.short(12)')
echo "$new_rev" > "$TMPDIR/owners/$owner_safe.rev"
echo "[$owner] Created new revision: $new_rev"
done
# Return to original revision
jj edit "$ORIG_CHANGE_ID" >/dev/null 2>&1 || true
echo ""
echo "Copying files to new revisions..."
# For each owner, copy their files from original to the new revision
for i in "${!OWNERS_ARR[@]}"; do
owner_safe="${OWNERS_ARR[$i]}"
owner=$(cat "$TMPDIR/owners/$owner_safe.name")
target_rev=$(cat "$TMPDIR/owners/$owner_safe.rev")
echo "Copying files for $owner to $target_rev..."
# Debug: show the command being run
file_count=$(wc -l < "$TMPDIR/owners/$owner_safe.files" | tr -d ' ')
echo " ($file_count files)"
# Pass all files at once to jj restore (show errors for debugging)
if ! xargs jj restore --from "$ORIG_CHANGE_ID" --to "$target_rev" < "$TMPDIR/owners/$owner_safe.files"; then
echo " Warning: jj restore had errors"
echo " Files: $(cat "$TMPDIR/owners/$owner_safe.files" | tr '\n' ' ')"
fi
done
echo ""
echo "Done! Created revisions:"
echo ""
for owner_safe in $ALL_OWNERS; do
owner=$(cat "$TMPDIR/owners/$owner_safe.name")
# shellcheck disable=SC2059
desc=$(printf "$TEMPLATE" "$owner")
echo " $owner -> $desc"
done