diff --git a/README.md b/README.md index c1eeb88..aff3891 100644 --- a/README.md +++ b/README.md @@ -269,7 +269,7 @@ snapshots that make `--apply` reversible. 1. **Get user and org metadata** ```bash - src-auth-perms-sync sync-saml-orgs + src-auth-perms-sync sync-saml-orgs --full ``` - Queries the Sourcegraph instance for auth providers, users, users' SAML groups, and orgs @@ -278,12 +278,41 @@ snapshots that make `--apply` reversible. 2. **Apply org sync** ```bash - src-auth-perms-sync sync-saml-orgs --apply + src-auth-perms-sync sync-saml-orgs --full --apply ``` - Creates the orgs if they don't exist, and sync the members from the SAML groups to the orgs - `--sync-saml-orgs` can also be added to a `set` run, to run both at the same time +3. **Scoped org sync for selected users** + + ```bash + src-auth-perms-sync sync-saml-orgs --users alice,bob + src-auth-perms-sync sync-saml-orgs --created-after 2026-06-01 + src-auth-perms-sync sync-saml-orgs --users-without-explicit-perms + ``` + + - Same user filters as `get` and `set`; a mode flag is required — there + is no bare `sync-saml-orgs` + +### Org sync behavior + +- Org names are `synced--` (non-alphanumeric characters + become `-`). The `synced-` prefix marks tool ownership: the sync only ever + modifies orgs whose name carries it, so manually created orgs are never touched. +- The org sync mode is always explicit — no surprises: + - **Full** (`sync-saml-orgs --full`, or `set --full` / `--repos*` + `--sync-saml-orgs`): converges every synced org against all users. A synced + org whose SAML group disappeared has all members removed, but the org itself + is kept (its settings survive in case the group comes back). + - **Scoped** (user filters on `sync-saml-orgs`, or `set --users` / + `--users-without-explicit-perms` / `--created-after` with + `--sync-saml-orgs`): syncs org membership for exactly the selected users — + per-user additions AND removals, computed from each user's own SAML + assertion and org list. Other users' memberships never change, and no full + user scan or org member listing is needed, so API traffic stays + proportional to the selection. + ## Options Run `src-auth-perms-sync --help` for options diff --git a/src/src_auth_perms_sync/cli.py b/src/src_auth_perms_sync/cli.py index dd0347a..fe93498 100644 --- a/src/src_auth_perms_sync/cli.py +++ b/src/src_auth_perms_sync/cli.py @@ -92,10 +92,15 @@ ) SYNC_SAML_ORGS_CONFIG_FIELDS = src.config_field_names( *COMMON_CONFIG_FIELDS_BEFORE, + "full", + "users", + "users_without_explicit_perms", + "created_after", "apply", "no_backup", "artifacts_dir", "no_files", + "explicit_permissions_batch_size", "parallelism", *COMMON_CONFIG_FIELDS_AFTER, ) @@ -146,6 +151,12 @@ "repos_without_explicit_perms": "set_repos_without_explicit_perms_sync_saml_orgs", "repos_created_after": "set_repos_created_after_sync_saml_orgs", } +SYNC_SAML_ORGS_ARTIFACT_NAMES: dict[str, str] = { + "full": "sync-saml-orgs-full-{run_mode}", + "users": "sync-saml-orgs-users-{run_mode}", + "users_without_explicit_perms": "sync-saml-orgs-users-without-explicit-perms-{run_mode}", + "created_after": "sync-saml-orgs-created-after-{run_mode}", +} SYNC_SET_COMMAND_ARTIFACT_NAMES: dict[permission_types.SetCommandMode, str] = { "full": "set-sync-saml-orgs-{run_mode}", "users": "set-add-users-sync-saml-orgs-{run_mode}", @@ -431,6 +442,7 @@ def validate_config(command_name: CommandName, config: Config) -> None: validate_user_filter_selection(command_name, config) validate_repository_filter_selection(command_name, config) validate_set_mode_selection(command_name, config) + validate_sync_saml_orgs_mode_selection(command_name, config) def validate_command_options(command_name: CommandName, config: Config) -> None: @@ -460,10 +472,11 @@ def validate_user_filter_selection(command_name: CommandName, config: Config) -> config_error("choose only one of --users or --users-without-explicit-perms") user_filter_selected = user_scope_filter_count > 0 or config.created_after is not None - user_filter_allowed = command_name in {"get", "set"} + user_filter_allowed = command_name in {"get", "set", "sync_saml_orgs"} if user_filter_selected and not user_filter_allowed: config_error( - "--users, --users-without-explicit-perms, and --created-after require get or set" + "--users, --users-without-explicit-perms, and --created-after " + "require get, set, or sync-saml-orgs" ) @@ -495,10 +508,39 @@ def validate_repository_filter_selection(command_name: CommandName, config: Conf config_error("choose either user filters or repo filters, not both") +def validate_sync_saml_orgs_mode_selection(command_name: CommandName, config: Config) -> None: + """Validate sync-saml-orgs command mode flags.""" + if command_name != "sync_saml_orgs": + return + + if config.full and config.created_after is not None: + config_error( + "--full cannot be combined with --created-after; full mode already syncs every user" + ) + if config.full and (config.users or config.users_without_explicit_perms): + config_error( + "with sync-saml-orgs, choose at most one of --full, --users, " + "or --users-without-explicit-perms" + ) + mode_selected = any( + ( + config.full, + bool(config.users), + config.users_without_explicit_perms, + config.created_after is not None, + ) + ) + if not mode_selected: + config_error( + "sync-saml-orgs requires one of --full, --users, " + "--users-without-explicit-perms, or --created-after" + ) + + def validate_set_mode_selection(command_name: CommandName, config: Config) -> None: """Validate set command mode flags.""" - if config.full and command_name != "set": - config_error("--full requires the set command") + if config.full and command_name not in {"set", "sync_saml_orgs"}: + config_error("--full requires the set or sync-saml-orgs command") if command_name != "set": return @@ -600,11 +642,24 @@ def resolve_command(command_name: CommandName, config: Config) -> ResolvedComman return ResolvedCommand( name="sync_saml_orgs", log_name="sync_saml_orgs", - artifact_name=f"sync-saml-orgs-{run_mode}", + artifact_name=SYNC_SAML_ORGS_ARTIFACT_NAMES[sync_saml_orgs_mode(config)].format( + run_mode=run_mode + ), sync_saml_organizations=True, ) +def sync_saml_orgs_mode(config: Config) -> str: + """Return the validated standalone sync-saml-orgs mode.""" + if config.users: + return "users" + if config.users_without_explicit_perms: + return "users_without_explicit_perms" + if config.created_after is not None: + return "created_after" + return "full" + + def resolve_set_command(config: Config, run_mode: str) -> ResolvedCommand: """Return resolved metadata for the selected set command mode.""" set_options = set_command_options(config) @@ -790,6 +845,8 @@ def run_command( run_restore(config, client, sourcegraph_site_config, run_paths, worker_pool) return command_data else: + # Standalone command: the config's user filters (or --full) choose + # between a scoped and a full org sync. run_sync_saml_organizations( config, client, @@ -797,6 +854,7 @@ def run_command( command_data, run_paths, worker_pool, + standalone=True, ) return command_data @@ -883,8 +941,16 @@ def run_sync_saml_organizations( command_data: run_context.CommandData, run_paths: backups.RunPaths, worker_pool: ThreadPoolExecutor, + *, + standalone: bool = False, ) -> None: - """Run the selected SAML organization sync command.""" + """Run the selected SAML organization sync command. + + Only the standalone command forwards the config's user filters: in + combined `set ... --sync-saml-orgs` runs those filters belong to the + set phase, which already hands over its selected users via + `command_data`. + """ organizations_command.cmd_sync_saml_organizations( client, run_paths, @@ -895,6 +961,10 @@ def run_sync_saml_organizations( ), do_backup=not config.no_backup, command_data=command_data, + user_identifiers=config.users if standalone else (), + users_without_explicit_perms=(config.users_without_explicit_perms if standalone else False), + user_created_after=config.created_after if standalone else None, + explicit_permissions_batch_size=config.explicit_permissions_batch_size, worker_pool=worker_pool, ) diff --git a/src/src_auth_perms_sync/orgs/queries.py b/src/src_auth_perms_sync/orgs/queries.py index bb6888c..ad76b3c 100644 --- a/src/src_auth_perms_sync/orgs/queries.py +++ b/src/src_auth_perms_sync/orgs/queries.py @@ -2,6 +2,52 @@ from __future__ import annotations +QUERY_CURRENT_USER = """ +query SamlOrganizationSyncCurrentUser { + currentUser { id username } +} +""" + +# One search request finds every tool-managed (`synced-` prefixed) org. +# totalCount > len(nodes) signals truncation; callers must then fall back +# to per-name lookups. +QUERY_SYNCED_ORGANIZATIONS = """ +query SyncedOrganizations($first: Int!, $query: String!) { + currentUser { id username } + organizations(first: $first, query: $query) { + totalCount + nodes { + id + name + } + } +} +""" + + +def users_organizations_batch_query(batch_size: int) -> str: + """Fetch many users' org memberships in one aliased `node()` request. + + Used to validate a scoped org sync: re-reading each scoped user's own + org list is far cheaper than paging every touched org's member list. + """ + variables = ", ".join(f"$user{index}: ID!" for index in range(batch_size)) + aliases = "".join( + f""" + user{index}: node(id: $user{index}) {{ + ... on User {{ + id + username + organizations {{ + nodes {{ id name }} + }} + }} + }}""" + for index in range(batch_size) + ) + return f"query UsersOrganizationsBatch({variables}) {{{aliases}\n}}" + + QUERY_ORGANIZATION_MEMBERS_PAGE = """ query OrganizationMembersPage($id: ID!, $first: Int!, $after: String) { node(id: $id) { diff --git a/src/src_auth_perms_sync/orgs/sync.py b/src/src_auth_perms_sync/orgs/sync.py index ce435b7..9931280 100644 --- a/src/src_auth_perms_sync/orgs/sync.py +++ b/src/src_auth_perms_sync/orgs/sync.py @@ -5,7 +5,6 @@ import datetime import json import logging -import re import time from collections.abc import Iterable from concurrent.futures import CancelledError, ThreadPoolExecutor @@ -16,6 +15,7 @@ import src_py_lib as src from ..permissions import apply as permissions_apply +from ..permissions import command as permissions_command from ..shared import backups, run_context, saml_groups from ..shared import sourcegraph as shared_sourcegraph from ..shared import types as shared_types @@ -27,15 +27,21 @@ ORGANIZATION_LOOKUP_BATCH_SIZE: int = 50 ORGANIZATION_SEARCH_RESULT_LIMIT: int = 100 ORGANIZATION_MEMBER_PAGE_SIZE: int = 1000 -ORGANIZATION_NAME_MAX_LENGTH: int = 255 -ORGANIZATION_SNAPSHOT_SCHEMA_VERSION: int = 1 +ORGANIZATION_SNAPSHOT_SCHEMA_VERSION: int = 2 ORGANIZATION_SNAPSHOT_DIFF_SCHEMA_VERSION: int = 1 +# One `organizations(query: "synced-")` search discovers every tool-managed +# org in a single request. Sourcegraph caps `first:` at 5000 on most +# connections; when an instance exceeds the returned page we fall back to +# per-name lookups (and skip orphaned-org cleanup) rather than miss orgs. +SYNCED_ORGANIZATION_SEARCH_LIMIT: int = 5000 -_ORGANIZATION_NAME_PART_RE = re.compile(r"[^A-Za-z0-9]+") -_ORGANIZATION_NAME_DASH_RUN_RE = re.compile(r"-+") _ALREADY_MEMBER_TEXT = "user is already a member of the organization" _ORGANIZATION_EXISTS_TEXT = "organization name is already taken" +# Re-exported here for callers that think in org-sync terms; the naming +# rule lives in shared.saml_groups so user compaction can share it. +organization_name_for_saml_group = saml_groups.organization_name_for_saml_group + @dataclass(frozen=True) class _OrganizationSyncState: @@ -92,24 +98,158 @@ def _load_organization_sync_state( client, command_data.saml_group_users, ) + + discovered = _discover_synced_organization_states(client) + if discovered is None: + # Truncated discovery: resolve target names individually and skip + # orphaned-org cleanup this run. + if not targets: + log.warning("No SAML group memberships found in user accountData — nothing to sync.") + return None + current_user, current_states = _load_current_organization_states( + client, + sorted(targets), + parallelism, + worker_pool, + ) + else: + current_user, synced_states = discovered + for organization_name in sorted(set(synced_states) - set(targets)): + # The SAML group behind this synced org is gone (or lost its + # last member): remove every member but keep the org so admin + # settings survive if the group comes back. + targets[organization_name] = organization_types.TargetOrganization( + name=organization_name, + provider_config_id="", + saml_group="", + ) + log.info( + "Synced org %s no longer matches any SAML group: " + "removing all members, keeping the org.", + organization_name, + ) + if not targets: + log.warning( + "No SAML group memberships found in user accountData " + "and no synced orgs exist — nothing to sync." + ) + return None + current_states = { + organization_name: synced_states.get(organization_name) + or organization_types.OrganizationState( + id=None, + name=organization_name, + members_by_id={}, + ) + for organization_name in targets + } + with src.span( + "load_current_organization_states", + organization_count=len(targets), + member_page_size=ORGANIZATION_MEMBER_PAGE_SIZE, + ) as load_event: + _fetch_members_into_states(client, current_states, parallelism, worker_pool, load_event) + + command_event["target_organizations"] = len(targets) + command_event["desired_memberships"] = sum( + len(target.desired_members_by_id) for target in targets.values() + ) + before_snapshot = _snapshot_from_states(client.endpoint, targets, current_states) + plan = _plan_organization_sync(targets, current_states, current_user) + expected_states = _expected_states_from_targets(targets, current_states) + expected_snapshot = _snapshot_from_states(client.endpoint, targets, expected_states) + return _OrganizationSyncState( + targets=targets, + current_user=current_user, + current_states=current_states, + before_snapshot=before_snapshot, + plan=plan, + expected_snapshot=expected_snapshot, + ) + + +def _load_scoped_organization_sync_state( + client: src.SourcegraphClient, + scoped_users: list[shared_types.ScopedSamlGroupUser], + parallelism: int, + command_event: dict[str, Any], + worker_pool: ThreadPoolExecutor | None = None, +) -> _OrganizationSyncState | None: + """Plan a per-user org sync covering only the given users. + + Each user's `accountData` is the complete truth for that user's SAML + groups, and their own org list is the complete truth for their current + synced-org memberships — so additions AND removals are both safe per + user. Users outside the scope are never touched, and neither full user + streams nor org member pages are loaded. + """ + log.info( + "Scoped SAML org sync: planning membership for %d selected user(s) only; " + "no other users' org memberships will change.", + len(scoped_users), + ) + command_event["scoped_user_count"] = len(scoped_users) + targets: dict[str, organization_types.TargetOrganization] = {} + collisions: set[str] = set() + for scoped_user in scoped_users: + _record_saml_group_user_memberships(targets, collisions, scoped_user) + _raise_for_target_collisions(collisions) + + current_members_by_organization: dict[str, dict[str, organization_types.OrgMember]] = {} + organization_ids_by_name: dict[str, str] = {} + for scoped_user in scoped_users: + for organization in scoped_user.synced_organizations: + organization_ids_by_name[organization["name"]] = organization["id"] + current_members_by_organization.setdefault(organization["name"], {})[ + scoped_user.user_id + ] = {"id": scoped_user.user_id, "username": scoped_user.username} + for organization_name in sorted(set(current_members_by_organization) - set(targets)): + # In-scope user(s) are members of a synced org that matches none of + # their SAML groups any more: plan their removal (org kept). + targets[organization_name] = organization_types.TargetOrganization( + name=organization_name, + provider_config_id="", + saml_group="", + ) + command_event["target_organizations"] = len(targets) command_event["desired_memberships"] = sum( len(target.desired_members_by_id) for target in targets.values() ) if not targets: - log.warning("No SAML group memberships found in user accountData — nothing to sync.") + log.info("Selected user(s) hold no SAML group or synced org memberships — nothing to sync.") return None - current_user, current_states = _load_current_organization_states( + # Org IDs for orgs the scoped users belong to come from their own org + # lists; only the remaining target names need a lookup (also yielding + # currentUser for the create path). No member pages are fetched. + names_needing_lookup = sorted(set(targets) - set(organization_ids_by_name)) + current_user, looked_up_states = _lookup_organization_states( client, - sorted(targets), + names_needing_lookup, parallelism, worker_pool, ) - before_snapshot = _snapshot_from_states(client.endpoint, targets, current_states) + current_states: dict[str, organization_types.OrganizationState] = {} + for organization_name in targets: + known_id = organization_ids_by_name.get(organization_name) + if known_id is not None: + state = organization_types.OrganizationState( + id=known_id, + name=organization_name, + members_by_id={}, + ) + else: + state = looked_up_states[organization_name] + state.members_by_id = dict(current_members_by_organization.get(organization_name, {})) + current_states[organization_name] = state + + before_snapshot = _snapshot_from_states(client.endpoint, targets, current_states, scope="users") plan = _plan_organization_sync(targets, current_states, current_user) expected_states = _expected_states_from_targets(targets, current_states) - expected_snapshot = _snapshot_from_states(client.endpoint, targets, expected_states) + expected_snapshot = _snapshot_from_states( + client.endpoint, targets, expected_states, scope="users" + ) return _OrganizationSyncState( targets=targets, current_user=current_user, @@ -120,6 +260,45 @@ def _load_organization_sync_state( ) +def _load_scoped_users_for_filters( + client: src.SourcegraphClient, + *, + user_identifiers: tuple[str, ...], + users_without_explicit_perms: bool, + user_created_after: str | None, + parallelism: int, + explicit_permissions_batch_size: int, + saml_groups_attribute_name_by_config_id: dict[str, str], + worker_pool: ThreadPoolExecutor | None = None, +) -> list[shared_types.ScopedSamlGroupUser]: + """Load and compact the users a standalone scoped org sync covers. + + Reuses the get/set user-selection pipeline; accountData and each + user's org memberships ride along in the same queries. + """ + users = permissions_command.load_selected_users( + client, + user_identifiers=user_identifiers, + users_without_explicit_perms=users_without_explicit_perms, + user_created_after=user_created_after, + parallelism=parallelism, + explicit_permissions_batch_size=explicit_permissions_batch_size, + include_account_data=True, + include_organizations=True, + worker_pool=worker_pool, + ) + log.info("Querying auth providers from %s ...", client.endpoint) + providers = shared_sourcegraph.list_auth_providers(client) + attribute_names_by_provider = saml_groups.attribute_names_by_provider_key( + providers, saml_groups_attribute_name_by_config_id + ) + return saml_groups.compact_scoped_saml_group_users( + users, + providers, + attribute_names_by_provider, + ) + + def _log_organization_sync_plan(sync_state: _OrganizationSyncState) -> None: log.info( "Organization sync plan: create %d org(s), add %d member(s), remove %d member(s).", @@ -288,6 +467,113 @@ def _finish_organization_apply_with_backup( log.info("To inspect the pre-sync org membership state, read:\n %s", before_path) +def _finish_scoped_organization_apply_with_backup( + client: src.SourcegraphClient, + run_paths: backups.RunPaths, + sync_state: _OrganizationSyncState, + scoped_users: list[shared_types.ScopedSamlGroupUser], + before_path: Path | None, + parallelism: int, + worker_pool: ThreadPoolExecutor | None = None, +) -> None: + """Validate a scoped apply by re-reading only the scoped users' org lists.""" + after_states = _load_scoped_after_states( + client, + sync_state, + scoped_users, + parallelism, + worker_pool, + ) + after_snapshot = _snapshot_from_states( + client.endpoint, sync_state.targets, after_states, scope="users" + ) + if run_paths.write_files: + after_path, diff_path = _write_organization_after_snapshot( + run_paths, + sync_state.before_snapshot, + after_snapshot, + ) + log.info("Wrote after org snapshot: %s diff=%s.", after_path, diff_path) + else: + log.info("Skipping after and diff org snapshots because --no-files is set.") + _validate_organization_sync(after_snapshot, sync_state.expected_snapshot) + if before_path is not None: + log.info("To inspect the pre-sync org membership state, read:\n %s", before_path) + + +def _load_scoped_after_states( + client: src.SourcegraphClient, + sync_state: _OrganizationSyncState, + scoped_users: list[shared_types.ScopedSamlGroupUser], + parallelism: int, + worker_pool: ThreadPoolExecutor | None = None, +) -> dict[str, organization_types.OrganizationState]: + """Rebuild target org states from the scoped users' refreshed org lists.""" + organizations_by_user_id = _fetch_users_organizations( + client, + [scoped_user.user_id for scoped_user in scoped_users], + parallelism, + worker_pool, + ) + after_states = { + organization_name: organization_types.OrganizationState( + id=sync_state.current_states[organization_name].id, + name=organization_name, + members_by_id={}, + ) + for organization_name in sync_state.targets + } + for scoped_user in scoped_users: + for organization in organizations_by_user_id.get(scoped_user.user_id, []): + state = after_states.get(organization["name"]) + if state is not None: + state.members_by_id[scoped_user.user_id] = { + "id": scoped_user.user_id, + "username": scoped_user.username, + } + return after_states + + +def _fetch_users_organizations( + client: src.SourcegraphClient, + user_ids: list[str], + parallelism: int, + worker_pool: ThreadPoolExecutor | None = None, +) -> dict[str, list[shared_types.OrganizationReference]]: + """Fetch many users' org memberships via aliased batch lookups.""" + + def fetch_batch( + batch: list[str], + ) -> list[tuple[str, list[shared_types.OrganizationReference]]]: + data = client.graphql( + queries.users_organizations_batch_query(len(batch)), + cast(src.JSONDict, {f"user{index}": user_id for index, user_id in enumerate(batch)}), + follow_pages=False, + ) + batch_organizations: list[tuple[str, list[shared_types.OrganizationReference]]] = [] + for index, user_id in enumerate(batch): + node = cast(dict[str, Any] | None, data.get(f"user{index}")) + organizations: list[shared_types.OrganizationReference] = [] + if node: + organizations = cast( + list[shared_types.OrganizationReference], + node["organizations"]["nodes"], + ) + batch_organizations.append((user_id, organizations)) + return batch_organizations + + organizations_by_user_id: dict[str, list[shared_types.OrganizationReference]] = {} + for batch_results in run_context.parallel_map( + fetch_batch, + list(_chunks(user_ids, ORGANIZATION_LOOKUP_BATCH_SIZE)), + parallelism=parallelism, + worker_pool=worker_pool, + ): + for user_id, organizations in batch_results: + organizations_by_user_id[user_id] = organizations + return organizations_by_user_id + + def _write_organization_after_snapshot( run_paths: backups.RunPaths, before_snapshot: organization_types.OrganizationSnapshot, @@ -323,30 +609,77 @@ def cmd_sync_saml_organizations( saml_groups_attribute_name_by_config_id: dict[str, str], do_backup: bool, command_data: run_context.CommandData | None = None, + user_identifiers: tuple[str, ...] = (), + users_without_explicit_perms: bool = False, + user_created_after: str | None = None, + explicit_permissions_batch_size: int = 50, worker_pool: ThreadPoolExecutor | None = None, ) -> None: - """Create/update Sourcegraph orgs from every discovered SAML group. + """Create/update Sourcegraph orgs from discovered SAML groups. Org names are deterministic and config-free: the Sourcegraph-safe form - of `-`. Invalid org-name + of `synced--`. Invalid org-name characters are converted to `-`; any resulting name collision fails before mutation so we never merge unrelated SAML groups accidentally. + The `synced-` prefix marks tool ownership: only orgs carrying it are + ever modified. + + Two modes: + + - Full (`sync-saml-orgs --full`, or after a full-overwrite set): + converges every synced org to the complete user population, + including emptying (but never deleting) synced orgs whose SAML + group disappeared. + - Scoped: per-user additions and removals for exactly the selected + users; no other users or orgs are touched and no full user stream + or member pages are loaded. The scope comes either from a preceding + additive set (`command_data.scoped_saml_group_users`) or from this + command's own user filters (`user_identifiers`, + `users_without_explicit_perms`, `user_created_after`). """ + resolved_command_data = command_data or run_context.CommandData() + scoped_users = resolved_command_data.scoped_saml_group_users + user_filters_selected = ( + bool(user_identifiers) or users_without_explicit_perms or user_created_after is not None + ) with src.span( "cmd_sync_saml_organizations", dry_run=dry_run, parallelism=parallelism, do_backup=do_backup, + scoped=scoped_users is not None or user_filters_selected, ) as command_event: - sync_state = _load_organization_sync_state( - client, - saml_groups_attribute_name_by_config_id, - parallelism, - command_event, - command_data or run_context.CommandData(), - worker_pool, - ) + if scoped_users is None and user_filters_selected: + scoped_users = _load_scoped_users_for_filters( + client, + user_identifiers=user_identifiers, + users_without_explicit_perms=users_without_explicit_perms, + user_created_after=user_created_after, + parallelism=parallelism, + explicit_permissions_batch_size=explicit_permissions_batch_size, + saml_groups_attribute_name_by_config_id=(saml_groups_attribute_name_by_config_id), + worker_pool=worker_pool, + ) + if scoped_users is not None: + sync_state = _load_scoped_organization_sync_state( + client, + scoped_users, + parallelism, + command_event, + worker_pool, + ) + else: + sync_state = _load_organization_sync_state( + client, + saml_groups_attribute_name_by_config_id, + parallelism, + command_event, + resolved_command_data, + worker_pool, + ) if sync_state is None: + if dry_run: + log.info("Dry run complete. Nothing to sync, so --apply would make no changes.") return _log_organization_sync_plan(sync_state) @@ -372,14 +705,25 @@ def cmd_sync_saml_organizations( _record_organization_apply_event(command_event, apply_result) if do_backup: - _finish_organization_apply_with_backup( - client, - run_paths, - sync_state, - before_path, - parallelism, - worker_pool, - ) + if scoped_users is not None: + _finish_scoped_organization_apply_with_backup( + client, + run_paths, + sync_state, + scoped_users, + before_path, + parallelism, + worker_pool, + ) + else: + _finish_organization_apply_with_backup( + client, + run_paths, + sync_state, + before_path, + parallelism, + worker_pool, + ) _raise_for_failed_organization_sync(apply_result) @@ -427,7 +771,7 @@ def _collect_target_organizations( def _record_saml_group_user_memberships( targets: dict[str, organization_types.TargetOrganization], collisions: set[str], - user: shared_types.SamlGroupUser, + user: shared_types.SamlGroupUser | shared_types.ScopedSamlGroupUser, ) -> None: for membership in user.saml_group_memberships: _record_target_organization_membership( @@ -444,7 +788,7 @@ def _record_target_organization_membership( collisions: set[str], provider_config_id: str, group_name: str, - user: shared_types.SamlGroupUser, + user: shared_types.SamlGroupUser | shared_types.ScopedSamlGroupUser, ) -> None: organization_name = organization_name_for_saml_group(provider_config_id, group_name) existing_target = targets.get(organization_name) @@ -509,89 +853,145 @@ def _log_target_collection_summary( ) -def organization_name_for_saml_group(provider_config_id: str, group_name: str) -> str: - provider_part = _organization_name_part(provider_config_id, "auth provider configID") - group_part = _organization_name_part(group_name, "SAML group name") - organization_name = f"{provider_part}-{group_part}" - if len(organization_name) > ORGANIZATION_NAME_MAX_LENGTH: - raise SystemExit( - f"FATAL: generated org name for configID={provider_config_id!r} " - f"group={group_name!r} is {len(organization_name)} characters; " - f"Sourcegraph org names must be <= {ORGANIZATION_NAME_MAX_LENGTH}." - ) - return organization_name - - -def _organization_name_part(value: str, label: str) -> str: - normalized = _ORGANIZATION_NAME_PART_RE.sub("-", value.strip()) - normalized = _ORGANIZATION_NAME_DASH_RUN_RE.sub("-", normalized).strip("-") - if not normalized: - raise SystemExit( - f"FATAL: {label} {value!r} cannot be converted to a Sourcegraph org-name part." - ) - return normalized - - def _load_current_organization_states( client: src.SourcegraphClient, organization_names: list[str], parallelism: int, worker_pool: ThreadPoolExecutor | None = None, ) -> tuple[organization_types.OrgMember, dict[str, organization_types.OrganizationState]]: - states: dict[str, organization_types.OrganizationState] = {} - current_user: organization_types.OrgMember | None = None - name_batches = list(_chunks(organization_names, ORGANIZATION_LOOKUP_BATCH_SIZE)) + """Resolve target org IDs and load their full member lists.""" with src.span( "load_current_organization_states", organization_count=len(organization_names), - lookup_batch_count=len(name_batches), member_page_size=ORGANIZATION_MEMBER_PAGE_SIZE, ) as load_event: + current_user, states = _lookup_organization_states( + client, + organization_names, + parallelism, + worker_pool, + ) + _fetch_members_into_states(client, states, parallelism, worker_pool, load_event) + return current_user, states - def fetch_organization_batch( - batch: list[str], - ) -> organization_types.OrganizationBatchLookup: - return _fetch_organization_batch(client, batch) - for result in run_context.parallel_map( - fetch_organization_batch, - name_batches, - parallelism=parallelism, - worker_pool=worker_pool, - ): - batch_current_user = result["current_user"] - if current_user is None: - current_user = batch_current_user - elif current_user["id"] != batch_current_user["id"]: - raise RuntimeError( - "currentUser changed between organization lookup batches " - f"({current_user['username']} vs {batch_current_user['username']})" - ) - states.update(result["states"]) +def _lookup_organization_states( + client: src.SourcegraphClient, + organization_names: list[str], + parallelism: int, + worker_pool: ThreadPoolExecutor | None = None, +) -> tuple[organization_types.OrgMember, dict[str, organization_types.OrganizationState]]: + """Resolve org names to IDs (no member pages) via aliased batch lookups.""" + states: dict[str, organization_types.OrganizationState] = {} + current_user: organization_types.OrgMember | None = None + name_batches = list(_chunks(organization_names, ORGANIZATION_LOOKUP_BATCH_SIZE)) - existing_states = [state for state in states.values() if state.id is not None] - load_event["existing_organizations_needing_member_pages"] = len(existing_states) + def fetch_organization_batch( + batch: list[str], + ) -> organization_types.OrganizationBatchLookup: + return _fetch_organization_batch(client, batch) - def fetch_members( - state: organization_types.OrganizationState, - ) -> tuple[organization_types.OrganizationState, list[organization_types.OrgMember]]: - return state, _fetch_all_members(client, state) + for result in run_context.parallel_map( + fetch_organization_batch, + name_batches, + parallelism=parallelism, + worker_pool=worker_pool, + ): + batch_current_user = result["current_user"] + if current_user is None: + current_user = batch_current_user + elif current_user["id"] != batch_current_user["id"]: + raise RuntimeError( + "currentUser changed between organization lookup batches " + f"({current_user['username']} vs {batch_current_user['username']})" + ) + states.update(result["states"]) + + if current_user is None: + current_user = _fetch_current_user(client) + return current_user, states - for state, members in run_context.parallel_map( - fetch_members, - existing_states, - parallelism=parallelism, - worker_pool=worker_pool, - ): - for member in members: - state.members_by_id[member["id"]] = member - load_event["existing_organizations"] = sum(1 for state in states.values() if state.id) - load_event["total_current_members"] = sum( - len(state.members_by_id) for state in states.values() - ) +def _fetch_members_into_states( + client: src.SourcegraphClient, + states: dict[str, organization_types.OrganizationState], + parallelism: int, + worker_pool: ThreadPoolExecutor | None, + load_event: dict[str, Any], +) -> None: + """Page every existing org's full member list into its state.""" + existing_states = [state for state in states.values() if state.id is not None] + load_event["existing_organizations_needing_member_pages"] = len(existing_states) + + def fetch_members( + state: organization_types.OrganizationState, + ) -> tuple[organization_types.OrganizationState, list[organization_types.OrgMember]]: + return state, _fetch_all_members(client, state) + + for state, members in run_context.parallel_map( + fetch_members, + existing_states, + parallelism=parallelism, + worker_pool=worker_pool, + ): + for member in members: + state.members_by_id[member["id"]] = member + load_event["existing_organizations"] = len(existing_states) + load_event["total_current_members"] = sum(len(state.members_by_id) for state in states.values()) + + +def _fetch_current_user(client: src.SourcegraphClient) -> organization_types.OrgMember: + data = client.graphql(queries.QUERY_CURRENT_USER) + current_user = cast(organization_types.OrgMember | None, data.get("currentUser")) if current_user is None: - raise RuntimeError("currentUser was not returned while loading organizations") + raise RuntimeError("currentUser is null; SAML org sync requires an authenticated token") + return current_user + + +def _discover_synced_organization_states( + client: src.SourcegraphClient, +) -> tuple[organization_types.OrgMember, dict[str, organization_types.OrganizationState]] | None: + """Find every existing `synced-` org (ID + name) in one search request. + + Returns None when the search result was truncated (more matches than + `SYNCED_ORGANIZATION_SEARCH_LIMIT`); callers must then fall back to + per-name lookups and skip orphaned-org cleanup. + """ + with src.span("discover_synced_organizations") as discovery_event: + data = client.graphql( + queries.QUERY_SYNCED_ORGANIZATIONS, + { + "first": SYNCED_ORGANIZATION_SEARCH_LIMIT, + "query": saml_groups.SYNCED_ORGANIZATION_NAME_PREFIX, + }, + ) + current_user = cast(organization_types.OrgMember | None, data.get("currentUser")) + if current_user is None: + raise RuntimeError("currentUser is null; SAML org sync requires an authenticated token") + connection = cast(dict[str, Any], data["organizations"]) + raw_organizations = cast(list[dict[str, Any]], connection["nodes"]) + total_count = cast(int, connection["totalCount"]) + discovery_event["total_count"] = total_count + discovery_event["returned_count"] = len(raw_organizations) + if total_count > len(raw_organizations): + log.warning( + "Synced-org discovery returned %d of %d matches; falling back to " + "per-name lookups and skipping orphaned synced-org cleanup this run.", + len(raw_organizations), + total_count, + ) + return None + # The search also matches display names; keep only true synced- names. + states = { + raw_organization["name"]: organization_types.OrganizationState( + id=cast(str, raw_organization["id"]), + name=cast(str, raw_organization["name"]), + members_by_id={}, + ) + for raw_organization in raw_organizations + if saml_groups.is_synced_organization_name(cast(str, raw_organization["name"])) + } + discovery_event["synced_organizations"] = len(states) return current_user, states @@ -990,6 +1390,8 @@ def _snapshot_from_states( endpoint: str, targets: dict[str, organization_types.TargetOrganization], states: dict[str, organization_types.OrganizationState], + *, + scope: organization_types.OrganizationSyncScope = "full", ) -> organization_types.OrganizationSnapshot: organizations: dict[str, organization_types.OrganizationSnapshotEntry] = {} for organization_name, target in sorted(targets.items()): @@ -1005,6 +1407,9 @@ def _snapshot_from_states( "schema_version": ORGANIZATION_SNAPSHOT_SCHEMA_VERSION, "captured_at": datetime.datetime.now(datetime.UTC).isoformat(timespec="seconds"), "endpoint": endpoint, + # "users" snapshots cover only the scoped users' memberships per + # org, not each org's full member list. + "scope": scope, "stats": { "target_organizations": len(organizations), "existing_organizations": sum( diff --git a/src/src_auth_perms_sync/orgs/types.py b/src/src_auth_perms_sync/orgs/types.py index 9924a91..fc44fb2 100644 --- a/src/src_auth_perms_sync/orgs/types.py +++ b/src/src_auth_perms_sync/orgs/types.py @@ -8,6 +8,9 @@ from ..shared import types as shared_types OrganizationChangeKind: TypeAlias = Literal["add", "remove"] +# "full": converge whole orgs against the complete user population. +# "users": per-user sync covering only the run's selected users. +OrganizationSyncScope: TypeAlias = Literal["full", "users"] class OrgMember(TypedDict): @@ -39,6 +42,9 @@ class OrganizationSnapshot(TypedDict): schema_version: int captured_at: str endpoint: str + # "users"-scoped snapshots record only the scoped users' memberships + # per org, not each org's full member list. + scope: OrganizationSyncScope stats: OrganizationSnapshotStats organizations: dict[str, OrganizationSnapshotEntry] diff --git a/src/src_auth_perms_sync/permissions/command.py b/src/src_auth_perms_sync/permissions/command.py index b968a18..d0aa5a1 100644 --- a/src/src_auth_perms_sync/permissions/command.py +++ b/src/src_auth_perms_sync/permissions/command.py @@ -274,7 +274,7 @@ def cmd_get( cmd_event["external_service_count"] = len(services) include_user_account_data = _providers_need_saml_account_data(raw_providers) - users = _load_get_users( + users = load_selected_users( client, user_identifiers=user_identifiers, users_without_explicit_perms=users_without_explicit_perms, @@ -394,7 +394,7 @@ def cmd_get( ) -def _load_get_users( +def load_selected_users( client: src.SourcegraphClient, *, user_identifiers: tuple[str, ...], @@ -403,14 +403,21 @@ def _load_get_users( parallelism: int, explicit_permissions_batch_size: int, include_account_data: bool, + include_organizations: bool = False, worker_pool: ThreadPoolExecutor | None, ) -> list[shared_types.User]: - """Load the Sourcegraph users selected by get/set-compatible user filters.""" + """Load the Sourcegraph users selected by the shared user filters. + + Used by the get command and by the standalone scoped sync-saml-orgs + modes; `include_organizations` rides the users' org memberships along + in the same queries for scoped org sync. + """ if user_identifiers: users = _resolve_user_identifiers( client, user_identifiers, include_account_data=include_account_data, + include_organizations=include_organizations, ) if user_created_after is None: return users @@ -463,19 +470,25 @@ def _load_get_users( client, candidates, include_account_data=include_account_data, + include_organizations=include_organizations, parallelism=parallelism, worker_pool=worker_pool, ) log.info("Selected %d user(s) for get output.", len(users)) return users - return _load_all_get_users(client, include_account_data=include_account_data) + return _load_all_get_users( + client, + include_account_data=include_account_data, + include_organizations=include_organizations, + ) def _load_all_get_users( client: src.SourcegraphClient, *, include_account_data: bool, + include_organizations: bool = False, ) -> list[shared_types.User]: """Load all users for get output, with progress logs for large instances.""" total_users = shared_sourcegraph.count_users(client) @@ -495,6 +508,7 @@ def _load_all_get_users( shared_sourcegraph.list_users_streaming( client, include_account_data=include_account_data, + include_organizations=include_organizations, ), start=1, ): @@ -523,6 +537,7 @@ def _hydrate_site_user_candidates( *, include_emails: bool = False, include_account_data: bool = True, + include_organizations: bool = False, parallelism: int, worker_pool: ThreadPoolExecutor | None, ) -> list[shared_types.User]: @@ -542,6 +557,7 @@ def _hydrate_site_user_candidates( [candidate["id"] for candidate in candidates], include_emails=include_emails, include_account_data=include_account_data, + include_organizations=include_organizations, parallelism=parallelism, worker_pool=worker_pool, progress_label="Hydrated selected Sourcegraph user metadata", @@ -581,6 +597,32 @@ def _log_user_planning_progress( ) +def _additive_command_data( + context: permission_types.MappingContext | None, + selected_users: list[shared_types.User], + retain_saml_group_users: bool, +) -> run_context.CommandData: + """Build an additive set command's result data. + + When `retain_saml_group_users` is set, the selected users are compacted + into `scoped_saml_group_users` so a subsequent `--sync-saml-orgs` phase + syncs org membership for exactly these users — per-user additions and + removals — without streaming all users again. An empty selection yields + an empty scope (org sync no-ops), never a full-instance sync. + """ + providers = context.providers if context is not None else None + if not retain_saml_group_users: + return run_context.CommandData(auth_providers=providers) + return run_context.CommandData( + auth_providers=providers, + scoped_saml_group_users=saml_groups.compact_scoped_saml_group_users( + selected_users, + providers or [], + context.saml_groups_attribute_names if context is not None else {}, + ), + ) + + def cmd_set( client: src.SourcegraphClient, run_paths: backups.RunPaths, @@ -688,7 +730,8 @@ def cmd_set( bind_id_mode, saml_groups_attribute_name_by_config_id, do_backup, - worker_pool, + retain_saml_group_users=retain_saml_group_users, + worker_pool=worker_pool, mapping_rules=mapping_rules, ) if options.mode == "users_without_explicit_perms": @@ -702,7 +745,8 @@ def cmd_set( bind_id_mode, saml_groups_attribute_name_by_config_id, do_backup, - worker_pool, + retain_saml_group_users=retain_saml_group_users, + worker_pool=worker_pool, mapping_rules=mapping_rules, ) if options.mode == "created_after": @@ -716,7 +760,8 @@ def cmd_set( bind_id_mode, saml_groups_attribute_name_by_config_id, do_backup, - worker_pool, + retain_saml_group_users=retain_saml_group_users, + worker_pool=worker_pool, mapping_rules=mapping_rules, ) return run_context.CommandData() @@ -732,6 +777,7 @@ def cmd_set_additive_users( bind_id_mode: str, saml_groups_attribute_name_by_config_id: dict[str, str], do_backup: bool, + retain_saml_group_users: bool = False, worker_pool: ThreadPoolExecutor | None = None, mapping_rules: list[permission_types.MappingRule] | None = None, ) -> run_context.CommandData: @@ -748,16 +794,18 @@ def cmd_set_additive_users( mapping_rules = resolve_mapping_rules(mapping_rules, run_paths.maps_path) if not mapping_rules: log.warning("No maps defined in %s — nothing to do.", run_paths.maps_path) - return run_context.CommandData() + return _additive_command_data(None, [], retain_saml_group_users) include_user_emails = permissions_mapping.mapping_rules_need_user_emails(mapping_rules) - include_user_account_data = permissions_mapping.mapping_rules_need_saml_account_data( - mapping_rules + include_user_account_data = ( + permissions_mapping.mapping_rules_need_saml_account_data(mapping_rules) + or retain_saml_group_users ) users = _resolve_user_identifiers( client, user_identifiers, include_emails=include_user_emails, include_account_data=include_user_account_data, + include_organizations=retain_saml_group_users, ) context = load_mapping_context_discovery( client, @@ -778,7 +826,7 @@ def cmd_set_additive_users( ) users = selected_users if not users: - return run_context.CommandData(auth_providers=context.providers) + return _additive_command_data(context, users, retain_saml_group_users) matching_rules = _mapping_rules_matching_selected_users(context, users) log.info( @@ -798,7 +846,7 @@ def cmd_set_additive_users( do_backup=do_backup, worker_pool=worker_pool, ) - return run_context.CommandData(auth_providers=context.providers) + return _additive_command_data(context, users, retain_saml_group_users) service_ids = _service_ids_required_by_mapping_rules(context, matching_rules) log.info( @@ -843,7 +891,7 @@ def cmd_set_additive_users( existing_repos_by_user_id=existing_repos_by_user_id, worker_pool=worker_pool, ) - return run_context.CommandData(auth_providers=context.providers) + return _additive_command_data(context, users, retain_saml_group_users) def cmd_set_additive_users_without_explicit_perms( @@ -856,6 +904,7 @@ def cmd_set_additive_users_without_explicit_perms( bind_id_mode: str, saml_groups_attribute_name_by_config_id: dict[str, str], do_backup: bool, + retain_saml_group_users: bool = False, worker_pool: ThreadPoolExecutor | None = None, mapping_rules: list[permission_types.MappingRule] | None = None, ) -> run_context.CommandData: @@ -876,15 +925,16 @@ def cmd_set_additive_users_without_explicit_perms( mapping_rules = resolve_mapping_rules(mapping_rules, run_paths.maps_path) if not mapping_rules: log.warning("No maps defined in %s — nothing to do.", run_paths.maps_path) - return run_context.CommandData() + return _additive_command_data(None, [], retain_saml_group_users) context = load_mapping_context_discovery( client, mapping_rules, saml_groups_attribute_name_by_config_id, ) include_user_emails = permissions_mapping.mapping_rules_need_user_emails(mapping_rules) - include_user_account_data = permissions_mapping.mapping_rules_need_saml_account_data( - mapping_rules + include_user_account_data = ( + permissions_mapping.mapping_rules_need_saml_account_data(mapping_rules) + or retain_saml_group_users ) # Match mapping rules BEFORE the explicit-permission check: the # per-user `permissionsInfo.repositories(source: API, first: 1)` @@ -904,6 +954,7 @@ def cmd_set_additive_users_without_explicit_perms( candidates, include_emails=include_user_emails, include_account_data=include_user_account_data, + include_organizations=retain_saml_group_users, parallelism=parallelism, worker_pool=worker_pool, ) @@ -947,7 +998,7 @@ def cmd_set_additive_users_without_explicit_perms( do_backup=do_backup, worker_pool=worker_pool, ) - return run_context.CommandData(auth_providers=context.providers) + return _additive_command_data(context, users, retain_saml_group_users) matching_rules = _mapping_rules_matching_selected_users(context, users) log.info( @@ -967,7 +1018,7 @@ def cmd_set_additive_users_without_explicit_perms( do_backup=do_backup, worker_pool=worker_pool, ) - return run_context.CommandData(auth_providers=context.providers) + return _additive_command_data(context, users, retain_saml_group_users) service_ids = _service_ids_required_by_mapping_rules(context, matching_rules) log.info( @@ -1020,7 +1071,7 @@ def cmd_set_additive_users_without_explicit_perms( do_backup=do_backup, worker_pool=worker_pool, ) - return run_context.CommandData(auth_providers=context.providers) + return _additive_command_data(context, users, retain_saml_group_users) def cmd_set_additive_created_after( @@ -1032,6 +1083,7 @@ def cmd_set_additive_created_after( bind_id_mode: str, saml_groups_attribute_name_by_config_id: dict[str, str], do_backup: bool, + retain_saml_group_users: bool = False, worker_pool: ThreadPoolExecutor | None = None, mapping_rules: list[permission_types.MappingRule] | None = None, ) -> run_context.CommandData: @@ -1050,15 +1102,16 @@ def cmd_set_additive_created_after( mapping_rules = resolve_mapping_rules(mapping_rules, run_paths.maps_path) if not mapping_rules: log.warning("No maps defined in %s — nothing to do.", run_paths.maps_path) - return run_context.CommandData() + return _additive_command_data(None, [], retain_saml_group_users) context = load_mapping_context_discovery( client, mapping_rules, saml_groups_attribute_name_by_config_id, ) include_user_emails = permissions_mapping.mapping_rules_need_user_emails(mapping_rules) - include_user_account_data = permissions_mapping.mapping_rules_need_saml_account_data( - mapping_rules + include_user_account_data = ( + permissions_mapping.mapping_rules_need_saml_account_data(mapping_rules) + or retain_saml_group_users ) candidates = permissions_sourcegraph.list_site_user_candidates( client, @@ -1076,6 +1129,7 @@ def cmd_set_additive_created_after( candidates, include_emails=include_user_emails, include_account_data=include_user_account_data, + include_organizations=retain_saml_group_users, parallelism=parallelism, worker_pool=worker_pool, ) @@ -1091,7 +1145,7 @@ def cmd_set_additive_created_after( do_backup=do_backup, worker_pool=worker_pool, ) - return run_context.CommandData(auth_providers=context.providers) + return _additive_command_data(context, users, retain_saml_group_users) matching_rules = _mapping_rules_matching_selected_users(context, users) log.info( @@ -1111,7 +1165,7 @@ def cmd_set_additive_created_after( do_backup=do_backup, worker_pool=worker_pool, ) - return run_context.CommandData(auth_providers=context.providers) + return _additive_command_data(context, users, retain_saml_group_users) service_ids = _service_ids_required_by_mapping_rules(context, matching_rules) log.info( @@ -1156,7 +1210,7 @@ def cmd_set_additive_created_after( existing_repos_by_user_id=existing_repos_by_user_id, worker_pool=worker_pool, ) - return run_context.CommandData(auth_providers=context.providers) + return _additive_command_data(context, users, retain_saml_group_users) def _resolve_user_identifiers( @@ -1165,6 +1219,7 @@ def _resolve_user_identifiers( *, include_emails: bool = False, include_account_data: bool = True, + include_organizations: bool = False, ) -> list[shared_types.User]: """Resolve username/email inputs to distinct Sourcegraph users in caller order.""" users: list[shared_types.User] = [] @@ -1175,6 +1230,7 @@ def _resolve_user_identifiers( user_identifier, include_emails=include_emails, include_account_data=include_account_data, + include_organizations=include_organizations, ) if user["id"] in seen_user_ids: continue @@ -1189,6 +1245,7 @@ def _resolve_user_identifier( *, include_emails: bool = False, include_account_data: bool = True, + include_organizations: bool = False, ) -> shared_types.User: """Resolve username/email input to one Sourcegraph user.""" user: shared_types.User | None @@ -1198,11 +1255,13 @@ def _resolve_user_identifier( user_identifier, include_emails=include_emails, include_account_data=include_account_data, + include_organizations=include_organizations, ) or permissions_sourcegraph.get_user_by_username( client, user_identifier, include_emails=include_emails, include_account_data=include_account_data, + include_organizations=include_organizations, ) else: user = permissions_sourcegraph.get_user_by_username( @@ -1210,11 +1269,13 @@ def _resolve_user_identifier( user_identifier, include_emails=include_emails, include_account_data=include_account_data, + include_organizations=include_organizations, ) or permissions_sourcegraph.get_user_by_email( client, user_identifier, include_emails=include_emails, include_account_data=include_account_data, + include_organizations=include_organizations, ) if user is None: raise SystemExit(f"No Sourcegraph user found for {user_identifier!r}.") diff --git a/src/src_auth_perms_sync/permissions/queries.py b/src/src_auth_perms_sync/permissions/queries.py index 5d5fcb3..ae5b64e 100644 --- a/src/src_auth_perms_sync/permissions/queries.py +++ b/src/src_auth_perms_sync/permissions/queries.py @@ -159,11 +159,20 @@ } """ +# Inlining org memberships into user hydration saves a separate per-user +# lookup when scoped org sync needs them. +USER_ORGANIZATIONS_FIELDS = """ +organizations { + nodes { id name } +} +""" + def user_fields( *, include_emails: bool = False, include_account_data: bool = True, + include_organizations: bool = False, ) -> str: """Return user fields, adding heavier fields only when downstream needs them.""" fields = USER_BASE_FIELDS.replace( @@ -171,7 +180,9 @@ def user_fields( USER_ACCOUNT_DATA_FIELD if include_account_data else "", ) if include_emails: - return f"{fields}\n{USER_EMAIL_FIELDS}" + fields = f"{fields}\n{USER_EMAIL_FIELDS}" + if include_organizations: + fields = f"{fields}\n{USER_ORGANIZATIONS_FIELDS}" return fields @@ -179,11 +190,17 @@ def query_user_by_username( *, include_emails: bool = False, include_account_data: bool = True, + include_organizations: bool = False, ) -> str: + fields = user_fields( + include_emails=include_emails, + include_account_data=include_account_data, + include_organizations=include_organizations, + ) return f""" query UserByUsername($username: String!) {{ user(username: $username) {{ - {user_fields(include_emails=include_emails, include_account_data=include_account_data)} + {fields} }} }} """ @@ -193,11 +210,17 @@ def query_user_by_email( *, include_emails: bool = False, include_account_data: bool = True, + include_organizations: bool = False, ) -> str: + fields = user_fields( + include_emails=include_emails, + include_account_data=include_account_data, + include_organizations=include_organizations, + ) return f""" query UserByEmail($email: String!) {{ user(email: $email) {{ - {user_fields(include_emails=include_emails, include_account_data=include_account_data)} + {fields} }} }} """ @@ -207,12 +230,18 @@ def query_user_by_id( *, include_emails: bool = False, include_account_data: bool = True, + include_organizations: bool = False, ) -> str: + fields = user_fields( + include_emails=include_emails, + include_account_data=include_account_data, + include_organizations=include_organizations, + ) return f""" query UserByID($id: ID!) {{ node(id: $id) {{ ... on User {{ - {user_fields(include_emails=include_emails, include_account_data=include_account_data)} + {fields} }} }} }} @@ -224,6 +253,7 @@ def users_by_ids_batch_query( *, include_emails: bool = False, include_account_data: bool = True, + include_organizations: bool = False, ) -> str: """Hydrate many users in one request via aliased `node()` lookups. @@ -231,7 +261,11 @@ def users_by_ids_batch_query( dominates user hydration, so batching cuts request count by the batch size with the same per-user fields. """ - fields = user_fields(include_emails=include_emails, include_account_data=include_account_data) + fields = user_fields( + include_emails=include_emails, + include_account_data=include_account_data, + include_organizations=include_organizations, + ) variables = ", ".join(f"$user{index}: ID!" for index in range(batch_size)) aliases = "".join( f""" diff --git a/src/src_auth_perms_sync/permissions/sourcegraph.py b/src/src_auth_perms_sync/permissions/sourcegraph.py index c96bb51..fa7a9d4 100644 --- a/src/src_auth_perms_sync/permissions/sourcegraph.py +++ b/src/src_auth_perms_sync/permissions/sourcegraph.py @@ -153,6 +153,7 @@ def get_user_by_username( *, include_emails: bool = False, include_account_data: bool = True, + include_organizations: bool = False, ) -> shared_types.User | None: """Return the exact Sourcegraph user for `username`, if it exists.""" data = cast( @@ -161,6 +162,7 @@ def get_user_by_username( queries.query_user_by_username( include_emails=include_emails, include_account_data=include_account_data, + include_organizations=include_organizations, ), cast(src.JSONDict, {"username": username}), ), @@ -174,6 +176,7 @@ def get_user_by_email( *, include_emails: bool = False, include_account_data: bool = True, + include_organizations: bool = False, ) -> shared_types.User | None: """Return the user owning the verified email address, if it exists.""" data = cast( @@ -182,6 +185,7 @@ def get_user_by_email( queries.query_user_by_email( include_emails=include_emails, include_account_data=include_account_data, + include_organizations=include_organizations, ), cast(src.JSONDict, {"email": email}), ), @@ -216,6 +220,7 @@ def get_users_by_ids( *, include_emails: bool = False, include_account_data: bool = True, + include_organizations: bool = False, parallelism: int = 1, worker_pool: ThreadPoolExecutor | None = None, progress_label: str | None = None, @@ -232,6 +237,7 @@ def fetch_batch(batch: Sequence[str]) -> list[shared_types.User | None]: len(batch), include_emails=include_emails, include_account_data=include_account_data, + include_organizations=include_organizations, ), cast(src.JSONDict, {f"user{index}": user_id for index, user_id in enumerate(batch)}), follow_pages=False, diff --git a/src/src_auth_perms_sync/shared/queries.py b/src/src_auth_perms_sync/shared/queries.py index 0a8126d..821aa4f 100644 --- a/src/src_auth_perms_sync/shared/queries.py +++ b/src/src_auth_perms_sync/shared/queries.py @@ -53,15 +53,24 @@ accountData """ +# Inlining the user's org memberships into the same user query saves a +# separate per-user lookup when scoped org sync needs them. +USER_ORGANIZATIONS_FIELD = """ organizations { + nodes { id name } + } +""" + def query_users( *, include_emails: bool = False, include_account_data: bool = True, + include_organizations: bool = False, ) -> str: """Return the users page query, adding heavier fields only when requested.""" email_fields = USER_EMAIL_FIELDS if include_emails else "" account_data_field = USER_ACCOUNT_DATA_FIELD if include_account_data else "" + organizations_field = USER_ORGANIZATIONS_FIELD if include_organizations else "" return f""" query ListUsers($first: Int!, $after: String) {{ users(first: $first, after: $after) {{ @@ -69,7 +78,7 @@ def query_users( id username builtinAuth -{email_fields} externalAccounts(first: 50) {{ +{email_fields}{organizations_field} externalAccounts(first: 50) {{ nodes {{ serviceType serviceID diff --git a/src/src_auth_perms_sync/shared/run_context.py b/src/src_auth_perms_sync/shared/run_context.py index 1364358..127d16c 100644 --- a/src/src_auth_perms_sync/shared/run_context.py +++ b/src/src_auth_perms_sync/shared/run_context.py @@ -26,10 +26,18 @@ class CommandData: `auth_provider_views` and `code_host_views` carry the same dicts the get command writes to `auth-providers.yaml` and `code-hosts.yaml`, so module callers receive discovery data without re-parsing files. + + `saml_group_users` carries the complete user population (full set + modes); a subsequent org sync reuses it for a full sync. Additive set + modes instead carry their selected user subset in + `scoped_saml_group_users`; a subsequent org sync then runs scoped to + exactly those users (per-user additions and removals, nobody else + touched) without streaming all users again. """ auth_providers: list[shared_types.AuthProvider] | None = None saml_group_users: list[shared_types.SamlGroupUser] | None = None + scoped_saml_group_users: list[shared_types.ScopedSamlGroupUser] | None = None auth_provider_views: list[dict[str, Any]] | None = None code_host_views: list[dict[str, Any]] | None = None maps_created: bool = False diff --git a/src/src_auth_perms_sync/shared/saml_groups.py b/src/src_auth_perms_sync/shared/saml_groups.py index 1dcc1dd..f0d9275 100644 --- a/src/src_auth_perms_sync/shared/saml_groups.py +++ b/src/src_auth_perms_sync/shared/saml_groups.py @@ -22,6 +22,7 @@ from __future__ import annotations +import re from collections.abc import Iterable from typing import Any, cast @@ -30,6 +31,48 @@ DEFAULT_GROUPS_ATTRIBUTE_NAME: str = "groups" SAML_SERVICE_TYPE: str = "saml" +# Every organization this tool creates gets this name prefix. No human +# would name an org `synced-...`, so the prefix doubles as the ownership +# marker: an org is tool-managed if and only if its name starts with it. +SYNCED_ORGANIZATION_NAME_PREFIX: str = "synced-" +ORGANIZATION_NAME_MAX_LENGTH: int = 255 + +_ORGANIZATION_NAME_PART_RE = re.compile(r"[^A-Za-z0-9]+") +_ORGANIZATION_NAME_DASH_RUN_RE = re.compile(r"-+") + + +def organization_name_for_saml_group(provider_config_id: str, group_name: str) -> str: + """Return the deterministic synced org name for one SAML group. + + Shape: `synced--`. + """ + provider_part = _organization_name_part(provider_config_id, "auth provider configID") + group_part = _organization_name_part(group_name, "SAML group name") + organization_name = f"{SYNCED_ORGANIZATION_NAME_PREFIX}{provider_part}-{group_part}" + if len(organization_name) > ORGANIZATION_NAME_MAX_LENGTH: + raise SystemExit( + f"FATAL: generated org name for configID={provider_config_id!r} " + f"group={group_name!r} is {len(organization_name)} characters; " + f"Sourcegraph org names must be <= {ORGANIZATION_NAME_MAX_LENGTH}." + ) + return organization_name + + +def _organization_name_part(value: str, label: str) -> str: + normalized = _ORGANIZATION_NAME_PART_RE.sub("-", value.strip()) + normalized = _ORGANIZATION_NAME_DASH_RUN_RE.sub("-", normalized).strip("-") + if not normalized: + raise SystemExit( + f"FATAL: {label} {value!r} cannot be converted to a Sourcegraph org-name part." + ) + return normalized + + +def is_synced_organization_name(organization_name: str) -> bool: + """Return whether an org name marks the org as managed by this tool.""" + return organization_name.startswith(SYNCED_ORGANIZATION_NAME_PREFIX) + + # Per-(serviceID, clientID) override of the SAML groups attribute name. # `None` or a missing key means "use DEFAULT_GROUPS_ATTRIBUTE_NAME for # this provider". Built by `attribute_names_by_provider_key()` from the @@ -150,12 +193,12 @@ def _dict_items(value: Any) -> list[dict[str, Any]]: return [cast(dict[str, Any], item) for item in items if isinstance(item, dict)] -def compact_saml_group_user( +def saml_group_memberships_for_user( user: shared_types.User, providers_by_account_key: dict[tuple[str, str], shared_types.AuthProvider], attribute_names_by_provider: SamlGroupsAttributeNameByProvider, -) -> shared_types.SamlGroupUser | None: - """Return only the user fields org sync needs from one full user row.""" +) -> tuple[shared_types.SamlGroupMembership, ...]: + """Extract one user's distinct (configID, group) SAML memberships.""" memberships: list[shared_types.SamlGroupMembership] = [] seen: set[tuple[str, str]] = set() for account in user["externalAccounts"]["nodes"]: @@ -180,12 +223,24 @@ def compact_saml_group_user( ) ) seen.add(membership_key) + return tuple(memberships) + + +def compact_saml_group_user( + user: shared_types.User, + providers_by_account_key: dict[tuple[str, str], shared_types.AuthProvider], + attribute_names_by_provider: SamlGroupsAttributeNameByProvider, +) -> shared_types.SamlGroupUser | None: + """Return only the user fields org sync needs from one full user row.""" + memberships = saml_group_memberships_for_user( + user, providers_by_account_key, attribute_names_by_provider + ) if not memberships: return None return shared_types.SamlGroupUser( user_id=user["id"], username=user["username"], - saml_group_memberships=tuple(memberships), + saml_group_memberships=memberships, ) @@ -206,6 +261,39 @@ def compact_saml_group_users( return compact_users +def compact_scoped_saml_group_users( + users: Iterable[shared_types.User], + providers: Iterable[shared_types.AuthProvider], + attribute_names_by_provider: SamlGroupsAttributeNameByProvider, +) -> list[shared_types.ScopedSamlGroupUser]: + """Compact in-scope users for a scoped (per-user) organization sync. + + Every user is kept — even with zero SAML group memberships — because + scoped org sync also removes users from synced orgs they left. Each + user's row must have been fetched with `include_organizations=True`; + only `synced-` prefixed org memberships are retained. + """ + providers_by_account_key = saml_providers_by_account_key(providers) + scoped_users: list[shared_types.ScopedSamlGroupUser] = [] + for user in users: + synced_organizations = tuple( + organization + for organization in user.get("organizations", {"nodes": []})["nodes"] + if is_synced_organization_name(organization["name"]) + ) + scoped_users.append( + shared_types.ScopedSamlGroupUser( + user_id=user["id"], + username=user["username"], + saml_group_memberships=saml_group_memberships_for_user( + user, providers_by_account_key, attribute_names_by_provider + ), + synced_organizations=synced_organizations, + ) + ) + return scoped_users + + def count_users_per_saml_group( users: Iterable[shared_types.User], attribute_names_by_provider: SamlGroupsAttributeNameByProvider | None = None, diff --git a/src/src_auth_perms_sync/shared/sourcegraph.py b/src/src_auth_perms_sync/shared/sourcegraph.py index e0ba123..969a66f 100644 --- a/src/src_auth_perms_sync/shared/sourcegraph.py +++ b/src/src_auth_perms_sync/shared/sourcegraph.py @@ -37,6 +37,7 @@ def list_users_with_accounts( *, include_emails: bool = False, include_account_data: bool = True, + include_organizations: bool = False, ) -> list[shared_types.User]: return [ cast(shared_types.User, node) @@ -44,6 +45,7 @@ def list_users_with_accounts( queries.query_users( include_emails=include_emails, include_account_data=include_account_data, + include_organizations=include_organizations, ), connection_path=("users",), page_size=DEFAULT_PAGE_SIZE, @@ -57,6 +59,7 @@ def list_users_streaming( *, include_emails: bool = False, include_account_data: bool = True, + include_organizations: bool = False, ) -> Iterator[shared_types.User]: """Stream ListUsers pages one at a time, yielding each User as it arrives. @@ -73,6 +76,7 @@ def list_users_streaming( queries.query_users( include_emails=include_emails, include_account_data=include_account_data, + include_organizations=include_organizations, ), connection_path=("users",), page_size=DEFAULT_PAGE_SIZE, diff --git a/src/src_auth_perms_sync/shared/types.py b/src/src_auth_perms_sync/shared/types.py index 46f2032..20c1df0 100644 --- a/src/src_auth_perms_sync/shared/types.py +++ b/src/src_auth_perms_sync/shared/types.py @@ -35,12 +35,22 @@ class UserEmail(TypedDict): verified: bool +class OrganizationReference(TypedDict): + id: str + name: str + + +class OrganizationReferenceConnection(TypedDict): + nodes: list[OrganizationReference] + + class User(TypedDict): id: str username: str builtinAuth: bool externalAccounts: ExternalAccountConnection emails: NotRequired[list[UserEmail]] + organizations: NotRequired[OrganizationReferenceConnection] @dataclass(frozen=True, slots=True) @@ -68,6 +78,20 @@ class SamlGroupUser(UserIdentity): saml_group_memberships: tuple[SamlGroupMembership, ...] +@dataclass(frozen=True, slots=True) +class ScopedSamlGroupUser(UserIdentity): + """One in-scope user for a scoped (per-user) SAML organization sync. + + Unlike `SamlGroupUser`, users with zero group memberships are kept: + scoped org sync must still remove them from synced orgs they no + longer belong to. `synced_organizations` carries the user's current + memberships in tool-managed (`synced-` prefixed) organizations. + """ + + saml_group_memberships: tuple[SamlGroupMembership, ...] + synced_organizations: tuple[OrganizationReference, ...] + + class SiteUserCandidate(TypedDict): id: str username: str diff --git a/tests/e2e/case_runner.py b/tests/e2e/case_runner.py index 85362d9..9158068 100644 --- a/tests/e2e/case_runner.py +++ b/tests/e2e/case_runner.py @@ -88,12 +88,22 @@ class FixtureRepo(TypedDict): createdAt: NotRequired[str] # default: 2026-01-01T00:00:00Z +class FixtureOrganization(TypedDict): + id: int + name: str + members: list[str] # usernames + + class FixtureState(TypedDict): endpoint: str authProviders: list[shared_types.AuthProvider] externalServices: list[FixtureExternalService] users: list[FixtureUser] repos: list[FixtureRepo] + organizations: NotRequired[list[FixtureOrganization]] # default: [] + # The user behind the fixture token (createOrganization auto-adds it). + # Default: the first user. + currentUsername: NotRequired[str] class FixtureCase(TypedDict): @@ -187,6 +197,18 @@ def __init__(self, state: FixtureState) -> None: self._external_services = list(state["externalServices"]) self._users = list(state["users"]) self._repos = list(state["repos"]) + self._organizations: list[FixtureOrganization] = [ + { + "id": organization["id"], + "name": organization["name"], + "members": list(organization["members"]), + } + for organization in state.get("organizations", []) + ] + self._organization_members_by_id: dict[int, set[str]] = { + organization["id"]: set(organization["members"]) for organization in self._organizations + } + self._current_username = state.get("currentUsername") or self._users[0]["username"] self._mutation_count = 0 self._users_by_graphql_id = { @@ -269,6 +291,42 @@ def graphql( return self._repository_names_by_id(variable_values) if "query PendingBindIDs" in query: return {"usersWithPendingPermissions": self._pending_bind_ids()} + if "query SamlOrganizationSyncCurrentUser" in query: + return {"currentUser": self._graphql_current_user()} + if "query SyncedOrganizations" in query: + return { + "currentUser": self._graphql_current_user(), + "organizations": self._search_organizations( + variable_values["query"], variable_values["first"] + ), + } + if "query SamlOrganizationLookup" in query: + lookup_data: dict[str, Any] = {"currentUser": self._graphql_current_user()} + index = 0 + while f"name{index}" in variable_values: + lookup_data[f"organization{index}"] = self._search_organizations( + variable_values[f"name{index}"], + variable_values["organizationFirst"], + ) + index += 1 + return lookup_data + if "query UsersOrganizationsBatch" in query: + organizations_data: dict[str, Any] = {} + index = 0 + while f"user{index}" in variable_values: + organizations_data[f"user{index}"] = self._graphql_user_by_id( + variable_values[f"user{index}"] + ) + index += 1 + return organizations_data + if "mutation CreateOrganization" in query: + return {"createOrganization": self._create_organization(variable_values["name"])} + if "mutation AddUserToOrganization" in query: + self._add_user_to_organization(variable_values) + return {"addUserToOrganization": {"alwaysNil": None}} + if "mutation RemoveUserFromOrganization" in query: + self._remove_user_from_organization(variable_values) + return {"removeUserFromOrganization": {"alwaysNil": None}} if "mutation SetRepoPerms" in query: self._set_repo_permissions(variable_values) return {"setRepositoryPermissionsForUsers": {"alwaysNil": None}} @@ -307,6 +365,8 @@ def stream_connection_nodes( return iter(self._explicit_repository_nodes_for_user(variable_values["id"])) if path == ("authorizedUserRepositories",): return iter(self._authorized_user_repositories(variable_values["bindID"])) + if path == ("node", "members"): + return iter(self._organization_member_nodes(variable_values["id"])) raise AssertionError(f"Unhandled fixture connection path: {path}") def export_state(self) -> FixtureState: @@ -331,6 +391,17 @@ def export_state(self) -> FixtureState: "externalServices": self._external_services, "users": self._users, "repos": repos, + "organizations": [ + { + "id": organization["id"], + "name": organization["name"], + "members": sorted(self._organization_members_by_id[organization["id"]]), + } + for organization in sorted( + self._organizations, key=lambda organization: organization["name"] + ) + ], + "currentUsername": self._current_username, } def _graphql_user_by_username(self, username_value: object) -> dict[str, Any] | None: @@ -363,6 +434,13 @@ def _graphql_user(self, user: FixtureUser) -> dict[str, Any]: "builtinAuth": user["builtinAuth"], "emails": list(user["emails"]), "externalAccounts": {"nodes": list(user["externalAccounts"])}, + "organizations": { + "nodes": [ + self._graphql_organization_reference(organization) + for organization in self._organizations + if user["username"] in self._organization_members_by_id[organization["id"]] + ] + }, } def _graphql_external_services(self) -> list[dict[str, Any]]: @@ -564,6 +642,99 @@ def _require_graphql_repository(self, repository: FixtureRepo) -> dict[str, Any] assert graphql_repository is not None return graphql_repository + def _graphql_current_user(self) -> dict[str, Any]: + user = self._users_by_username[self._current_username] + return {"id": self._user_graphql_id(user["id"]), "username": user["username"]} + + def _graphql_organization_reference(self, organization: FixtureOrganization) -> dict[str, Any]: + return { + "id": self._organization_graphql_id(organization["id"]), + "name": organization["name"], + } + + def _search_organizations(self, query_value: object, first_value: object) -> dict[str, Any]: + """Substring search over org names, like the real organizations(query:).""" + if not isinstance(query_value, str): + raise AssertionError("organizations query variable must be a string") + if not isinstance(first_value, int): + raise AssertionError("organizations first variable must be an integer") + matches = [ + organization + for organization in self._organizations + if query_value in organization["name"] + ] + return { + "totalCount": len(matches), + "nodes": [ + self._graphql_organization_reference(organization) + for organization in matches[:first_value] + ], + } + + def _organization_member_nodes(self, organization_id_value: object) -> list[dict[str, Any]]: + organization = self._organization_by_graphql_id(organization_id_value) + return [ + self._graphql_current_user_like(username) + for username in sorted(self._organization_members_by_id[organization["id"]]) + ] + + def _graphql_current_user_like(self, username: str) -> dict[str, Any]: + user = self._users_by_username[username] + return {"id": self._user_graphql_id(user["id"]), "username": username} + + def _create_organization(self, name_value: object) -> dict[str, Any]: + if not isinstance(name_value, str): + raise AssertionError("organization name variable must be a string") + if any(organization["name"] == name_value for organization in self._organizations): + raise src.GraphQLError( + f"createOrganization: organization name is already taken: {name_value}" + ) + organization_id = ( + max((organization["id"] for organization in self._organizations), default=0) + 1 + ) + organization: FixtureOrganization = { + "id": organization_id, + "name": name_value, + "members": [], + } + self._organizations.append(organization) + # createOrganization adds the caller as the first member. + self._organization_members_by_id[organization_id] = {self._current_username} + self._mutation_count += 1 + return self._graphql_organization_reference(organization) + + def _add_user_to_organization(self, variables: dict[str, object]) -> None: + organization = self._organization_by_graphql_id(variables["organization"]) + username_value = variables["username"] + if not isinstance(username_value, str): + raise AssertionError("username variable must be a string") + if username_value not in self._users_by_username: + raise src.GraphQLError(f"addUserToOrganization: no such user: {username_value}") + members = self._organization_members_by_id[organization["id"]] + if username_value in members: + raise src.GraphQLError( + "addUserToOrganization: user is already a member of the organization" + ) + members.add(username_value) + self._mutation_count += 1 + + def _remove_user_from_organization(self, variables: dict[str, object]) -> None: + organization = self._organization_by_graphql_id(variables["organization"]) + username = self._username_from_user_graphql_id(variables["user"]) + self._organization_members_by_id[organization["id"]].discard(username) + self._mutation_count += 1 + + def _organization_by_graphql_id(self, organization_id_value: object) -> FixtureOrganization: + if not isinstance(organization_id_value, str): + raise AssertionError("organization variable must be a string") + for organization in self._organizations: + if self._organization_graphql_id(organization["id"]) == organization_id_value: + return organization + raise AssertionError(f"unknown organization GraphQL ID: {organization_id_value}") + + def _organization_graphql_id(self, organization_id: int) -> str: + return src.encode_sourcegraph_node_id("Org", organization_id) + def _add_repo_permission(self, variables: dict[str, object]) -> None: repository_id = self._repository_integer_id(variables["repo"]) username = self._username_from_user_graphql_id(variables["user"]) @@ -664,7 +835,13 @@ def case_cli_arguments(case: FixtureCase, case_name: str) -> list[str]: argv = shlex.split(cli_command) else: flags = cli_flags_by_field_name() - argv = [cast(str, args["command"])] + # args carry the CommandName (e.g. sync_saml_orgs); the argv needs + # the CLI argument spelling (sync-saml-orgs). + argument_names = { + command.command_name: command.argument_name for command in cli.CLI_COMMANDS + } + command_name = cast(str, args["command"]) + argv = [argument_names.get(command_name, command_name)] for field_name, value in args.items(): if field_name == "command" or value is None or value is False: continue diff --git a/tests/e2e/fixtures/set-users-sync-saml-orgs-noop/before.json b/tests/e2e/fixtures/set-users-sync-saml-orgs-noop/before.json new file mode 100644 index 0000000..4daeaf4 --- /dev/null +++ b/tests/e2e/fixtures/set-users-sync-saml-orgs-noop/before.json @@ -0,0 +1,108 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "currentUsername": "org_admin_09990", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + }, + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "displayName": "Okta", + "isBuiltin": false, + "configID": "okta" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "org_admin_09990", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "org_admin_09990@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09991", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09991@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + } + ] + } + } + } + } + ] + }, + { + "id": 3, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09992@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [ + "test_user_09991" + ] + } + ], + "organizations": [ + { + "id": 1, + "name": "synced-okta-engineering", + "members": [ + "test_user_09991" + ] + } + ] +} diff --git a/tests/e2e/fixtures/set-users-sync-saml-orgs-noop/maps.yaml b/tests/e2e/fixtures/set-users-sync-saml-orgs-noop/maps.yaml new file mode 100644 index 0000000..fc31cb1 --- /dev/null +++ b/tests/e2e/fixtures/set-users-sync-saml-orgs-noop/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Grant the app repo to the selected user + users: + usernames: + - test_user_09991 + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/set-users-sync-saml-orgs-scoped/after.json b/tests/e2e/fixtures/set-users-sync-saml-orgs-scoped/after.json new file mode 100644 index 0000000..4ebbac7 --- /dev/null +++ b/tests/e2e/fixtures/set-users-sync-saml-orgs-scoped/after.json @@ -0,0 +1,109 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "currentUsername": "org_admin_09990", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + }, + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "displayName": "Okta", + "isBuiltin": false, + "configID": "okta" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "org_admin_09990", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "org_admin_09990@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09991", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09991@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + } + ] + } + } + } + } + ] + }, + { + "id": 3, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09992@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": ["test_user_09991"] + } + ], + "organizations": [ + { + "id": 2, + "name": "synced-okta-engineering", + "members": ["test_user_09991"] + }, + { + "id": 1, + "name": "synced-okta-stale", + "members": ["test_user_09992"] + } + ] +} diff --git a/tests/e2e/fixtures/set-users-sync-saml-orgs-scoped/before.json b/tests/e2e/fixtures/set-users-sync-saml-orgs-scoped/before.json new file mode 100644 index 0000000..9f17136 --- /dev/null +++ b/tests/e2e/fixtures/set-users-sync-saml-orgs-scoped/before.json @@ -0,0 +1,104 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "currentUsername": "org_admin_09990", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + }, + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "displayName": "Okta", + "isBuiltin": false, + "configID": "okta" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "org_admin_09990", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "org_admin_09990@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09991", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09991@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + } + ] + } + } + } + } + ] + }, + { + "id": 3, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09992@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + } + ], + "organizations": [ + { + "id": 1, + "name": "synced-okta-stale", + "members": ["test_user_09991", "test_user_09992"] + } + ] +} diff --git a/tests/e2e/fixtures/set-users-sync-saml-orgs-scoped/maps.yaml b/tests/e2e/fixtures/set-users-sync-saml-orgs-scoped/maps.yaml new file mode 100644 index 0000000..fc31cb1 --- /dev/null +++ b/tests/e2e/fixtures/set-users-sync-saml-orgs-scoped/maps.yaml @@ -0,0 +1,8 @@ +maps: + - name: Grant the app repo to the selected user + users: + usernames: + - test_user_09991 + repos: + names: + - test-repo-49981 diff --git a/tests/e2e/fixtures/sync-saml-orgs-full-cleanup/after.json b/tests/e2e/fixtures/sync-saml-orgs-full-cleanup/after.json new file mode 100644 index 0000000..7c5b5b2 --- /dev/null +++ b/tests/e2e/fixtures/sync-saml-orgs-full-cleanup/after.json @@ -0,0 +1,111 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "currentUsername": "org_admin_09990", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + }, + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "displayName": "Okta", + "isBuiltin": false, + "configID": "okta" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "org_admin_09990", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "org_admin_09990@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09991", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09991@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + } + ] + } + } + } + } + ] + }, + { + "id": 3, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09992@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + } + ], + "organizations": [ + { + "id": 2, + "name": "synced-okta-engineering", + "members": [ + "test_user_09991" + ] + }, + { + "id": 1, + "name": "synced-okta-stale", + "members": [] + } + ] +} diff --git a/tests/e2e/fixtures/sync-saml-orgs-full-cleanup/before.json b/tests/e2e/fixtures/sync-saml-orgs-full-cleanup/before.json new file mode 100644 index 0000000..9374140 --- /dev/null +++ b/tests/e2e/fixtures/sync-saml-orgs-full-cleanup/before.json @@ -0,0 +1,106 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "currentUsername": "org_admin_09990", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + }, + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "displayName": "Okta", + "isBuiltin": false, + "configID": "okta" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "org_admin_09990", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "org_admin_09990@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09991", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09991@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + } + ] + } + } + } + } + ] + }, + { + "id": 3, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09992@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + } + ], + "organizations": [ + { + "id": 1, + "name": "synced-okta-stale", + "members": [ + "test_user_09992" + ] + } + ] +} diff --git a/tests/e2e/fixtures/sync-saml-orgs-users-scoped/after.json b/tests/e2e/fixtures/sync-saml-orgs-users-scoped/after.json new file mode 100644 index 0000000..62850bc --- /dev/null +++ b/tests/e2e/fixtures/sync-saml-orgs-users-scoped/after.json @@ -0,0 +1,113 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "currentUsername": "org_admin_09990", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + }, + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "displayName": "Okta", + "isBuiltin": false, + "configID": "okta" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "org_admin_09990", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "org_admin_09990@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09991", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09991@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + } + ] + } + } + } + } + ] + }, + { + "id": 3, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09992@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + } + ], + "organizations": [ + { + "id": 2, + "name": "synced-okta-engineering", + "members": [ + "test_user_09991" + ] + }, + { + "id": 1, + "name": "synced-okta-stale", + "members": [ + "test_user_09992" + ] + } + ] +} diff --git a/tests/e2e/fixtures/sync-saml-orgs-users-scoped/before.json b/tests/e2e/fixtures/sync-saml-orgs-users-scoped/before.json new file mode 100644 index 0000000..7dce83f --- /dev/null +++ b/tests/e2e/fixtures/sync-saml-orgs-users-scoped/before.json @@ -0,0 +1,107 @@ +{ + "endpoint": "https://fixture.sourcegraph.test", + "currentUsername": "org_admin_09990", + "authProviders": [ + { + "serviceType": "builtin", + "serviceID": "", + "clientID": "", + "displayName": "Builtin username/password", + "isBuiltin": true, + "configID": "" + }, + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "displayName": "Okta", + "isBuiltin": false, + "configID": "okta" + } + ], + "externalServices": [ + { + "id": 1, + "kind": "GITHUB", + "displayName": "GitHub", + "url": "https://github.com/", + "config": "{}" + } + ], + "users": [ + { + "id": 1, + "username": "org_admin_09990", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "org_admin_09990@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [] + }, + { + "id": 2, + "username": "test_user_09991", + "builtinAuth": false, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09991@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [ + { + "serviceType": "saml", + "serviceID": "http://www.okta.com/test123", + "clientID": "https://sourcegraph.test/.auth/saml/metadata", + "accountData": { + "Values": { + "groups": { + "Values": [ + { + "Value": "engineering" + } + ] + } + } + } + } + ] + }, + { + "id": 3, + "username": "test_user_09992", + "builtinAuth": true, + "createdAt": "2026-01-01T00:00:00Z", + "emails": [ + { + "email": "test_user_09992@perms-sync.test", + "verified": true + } + ], + "externalAccounts": [] + } + ], + "repos": [ + { + "id": 101, + "name": "test-repo-49981", + "externalServiceID": 1, + "explicitPermissionsUsers": [] + } + ], + "organizations": [ + { + "id": 1, + "name": "synced-okta-stale", + "members": [ + "test_user_09991", + "test_user_09992" + ] + } + ] +} diff --git a/tests/run.py b/tests/run.py index 7048380..f12c68c 100644 --- a/tests/run.py +++ b/tests/run.py @@ -1980,7 +1980,7 @@ def run_seeded_org_sync_check(self, environment: dict[str, str]) -> None: self.run_cli_case( CliCase( f"{label} [apply]", - ("sync-saml-orgs", "--apply"), + ("sync-saml-orgs", "--full", "--apply"), 0, (ORGANIZATION_SYNC_VALIDATION_OK,), ), diff --git a/tests/tests.yaml b/tests/tests.yaml index 4898ef0..1b8b72f 100644 --- a/tests/tests.yaml +++ b/tests/tests.yaml @@ -435,7 +435,7 @@ cases: expectedOutput: - "unrecognized arguments: --sync-saml-orgs" - reject-sync-saml-orgs-created-after: + reject-bare-sync-saml-orgs: # scope: # users: 0 # repos: 0 @@ -443,15 +443,35 @@ cases: # cost: # bigO: O(1) # note: parse-only -- no instance reads or writes - description: sync-saml-orgs does not take user filters. + description: >- + sync-saml-orgs requires an explicit mode: --full for a whole-instance + sync, or a user filter for a scoped sync. modes: - local - cliCommand: sync-saml-orgs --created-after 2099-01-01 + cliCommand: sync-saml-orgs expectedExitCode: 2 expectedOutput: - - unrecognized arguments + - >- + sync-saml-orgs requires one of --full, --users, + --users-without-explicit-perms, or --created-after + + reject-sync-saml-orgs-full-and-users: + # scope: + # users: 0 + # repos: 0 + # perms: 0 + # cost: + # bigO: O(1) + # note: parse-only -- no instance reads or writes + description: "--full means all users; combining it with a user filter is contradictory." + modes: + - local + cliCommand: sync-saml-orgs --full --users username_doesnt_exist_01 + expectedExitCode: 2 + expectedOutput: + - "choose at most one of --full, --users" - reject-sync-saml-orgs-users: + reject-sync-saml-orgs-full-and-created-after: # scope: # users: 0 # repos: 0 @@ -459,15 +479,15 @@ cases: # cost: # bigO: O(1) # note: parse-only -- no instance reads or writes - description: sync-saml-orgs does not take a user list. + description: "--full means all users; combining it with --created-after is contradictory." modes: - local - cliCommand: sync-saml-orgs --users username_doesnt_exist_01 + cliCommand: sync-saml-orgs --full --created-after 2099-01-01 expectedExitCode: 2 expectedOutput: - - "unrecognized arguments: --users" + - "--full cannot be combined with --created-after" - reject-sync-saml-orgs-full: + reject-sync-saml-orgs-repos: # scope: # users: 0 # repos: 0 @@ -475,13 +495,13 @@ cases: # cost: # bigO: O(1) # note: parse-only -- no instance reads or writes - description: "--full belongs to set; sync-saml-orgs does not accept it." + description: sync-saml-orgs takes user filters, never repo filters. modes: - local - cliCommand: sync-saml-orgs --full + cliCommand: sync-saml-orgs --repos test-repo-49981 expectedExitCode: 2 expectedOutput: - - "unrecognized arguments: --full" + - "unrecognized arguments: --repos" reject-sync-saml-orgs-restore-path: # scope: @@ -1317,6 +1337,109 @@ cases: no_backup: true expectedMutations: 2 + set-users-sync-saml-orgs-scoped: + # scope: + # users: 3 + # repos: 1 + # perms: 0 + # cost: + # bigO: O(1) + # reads: + # users: 1 + # orgLookups: 1 + # writes: 5 mutations + description: >- + Additive set --users with --sync-saml-orgs runs a SCOPED org sync: + the selected user is added to the synced org their SAML group + requires (created on demand, creator membership cleaned up) and + removed from a synced org their groups no longer justify — while an + out-of-scope member of that same org is left untouched. One repo + grant + create + add + creator removal + stale removal = 5 mutations. + modes: + - local + args: + command: set + users: + - test_user_09991 + sync_saml_organizations: true + apply: true + expectedMutations: 5 + + set-users-sync-saml-orgs-noop: + # scope: + # users: 3 + # repos: 1 + # perms: 1 + # cost: + # bigO: O(1) + # reads: + # users: 1 + # writes: none + description: >- + Scoped org sync is idempotent: with repo grants and synced org + membership already converged, a set --users --sync-saml-orgs apply + issues zero mutations and changes nothing. + modes: + - local + args: + command: set + users: + - test_user_09991 + sync_saml_organizations: true + apply: true + expectedMutations: 0 + + sync-saml-orgs-full-cleanup: + # scope: + # users: 3 + # repos: 1 + # perms: 0 + # cost: + # bigO: O(U) + # reads: + # users: all + # orgMembers: all + # writes: 4 mutations + description: >- + Standalone full org sync converges synced orgs both ways: creates the + org a SAML group requires (cleaning up the creator's auto-membership) + and empties — but never deletes — a synced org whose SAML group + disappeared from every user's assertion. + modes: + - local + args: + command: sync_saml_orgs + full: true + apply: true + expectedMutations: 4 + + sync-saml-orgs-users-scoped: + # scope: + # users: 3 + # repos: 1 + # perms: 0 + # cost: + # bigO: O(1) + # reads: + # users: 1 + # orgLookups: 1 + # writes: 4 mutations + description: >- + Standalone scoped org sync via sync-saml-orgs --users: the selected + user is added to the synced org their SAML group requires (created + on demand, creator membership cleaned up) and removed from a synced + org their groups no longer justify; an out-of-scope member of that + org is untouched and repo permissions never change. create + add + + creator removal + stale removal = 4 mutations. + modes: + - local + args: + command: sync_saml_orgs + users: + - test_user_09991 + apply: true + expectedMutations: 4 + full-overwrite-removes-stale-grant: # scope: # users: 2 @@ -1554,11 +1677,11 @@ cases: # writes: none # note: streams every user's SAML account data description: >- - Standalone organization sync dry run. Also pins the explicit + Standalone full organization sync dry run. Also pins the explicit --env-file flag against its default value. modes: - live - cliCommand: sync-saml-orgs --env-file .env + cliCommand: sync-saml-orgs --full --env-file .env expectedExitCode: 0 expectedOutput: - Dry run complete @@ -1717,7 +1840,7 @@ cases: outcome; it is safe to re-run. modes: - live - cliCommand: sync-saml-orgs --apply + cliCommand: sync-saml-orgs --full --apply expectedExitCode: 0 expectedOutput: - "VALIDATION OK: all target org memberships match" diff --git a/tests/unit/test_cli_config.py b/tests/unit/test_cli_config.py index b581552..118218c 100644 --- a/tests/unit/test_cli_config.py +++ b/tests/unit/test_cli_config.py @@ -357,7 +357,7 @@ def test_validate_config_allows_no_files_without_apply(self) -> None: "restore", make_config(restore_path=Path("snapshot.json"), no_files=True), ) - cli.validate_config("sync_saml_orgs", make_config(no_files=True)) + cli.validate_config("sync_saml_orgs", make_config(full=True, no_files=True)) def test_validate_config_rejects_maps_path_outside_get_and_set(self) -> None: expected_message = "--maps-path requires the get or set command" @@ -397,7 +397,7 @@ def test_artifacts_dir_flag_is_accepted_on_every_command(self) -> None: "get": [], "set": ["--full"], "restore": ["--restore-path", str(snapshot_path)], - "sync-saml-orgs": [], + "sync-saml-orgs": ["--full"], } for command_argument, extra_arguments in extra_arguments_by_command.items(): @@ -440,7 +440,9 @@ def test_validate_config_rejects_restore_path_without_restore(self) -> None: ) def test_validate_config_rejects_set_modes_without_set(self) -> None: - self.assert_config_error("get", make_config(full=True), "requires the set command") + self.assert_config_error( + "get", make_config(full=True), "requires the set or sync-saml-orgs command" + ) def test_validate_config_allows_get_user_filters_without_set(self) -> None: cli.validate_config("get", make_config(users=("alice", "bob@example.com"))) @@ -463,7 +465,7 @@ def test_validate_config_rejects_user_filters_on_non_get_set_commands(self) -> N self.assert_config_error( "restore", make_config(restore_path=Path("snapshot.json"), users=("alice",)), - "require get or set", + "require get, set, or sync-saml-orgs", ) def test_validate_config_rejects_repo_filter_conflicts(self) -> None: @@ -498,6 +500,49 @@ def test_validate_config_rejects_set_without_explicit_mode(self) -> None: "set requires one of --full", ) + def test_validate_config_rejects_sync_saml_orgs_without_explicit_mode(self) -> None: + self.assert_config_error( + "sync_saml_orgs", + make_config(), + "sync-saml-orgs requires one of --full, --users", + ) + + def test_validate_config_rejects_sync_saml_orgs_full_with_user_filters(self) -> None: + self.assert_config_error( + "sync_saml_orgs", + make_config(full=True, users=("alice",)), + "choose at most one of --full, --users", + ) + self.assert_config_error( + "sync_saml_orgs", + make_config(full=True, created_after="2099-01-01"), + "--full cannot be combined with --created-after", + ) + + def test_validate_config_allows_sync_saml_orgs_modes(self) -> None: + cli.validate_config("sync_saml_orgs", make_config(full=True)) + cli.validate_config("sync_saml_orgs", make_config(users=("alice",))) + cli.validate_config("sync_saml_orgs", make_config(users_without_explicit_perms=True)) + cli.validate_config("sync_saml_orgs", make_config(created_after="2026-01-01")) + + def test_resolve_command_names_sync_saml_orgs_artifacts_by_mode(self) -> None: + self.assertEqual( + "sync-saml-orgs-full-dry-run", + cli.resolve_command("sync_saml_orgs", make_config(full=True)).artifact_name, + ) + self.assertEqual( + "sync-saml-orgs-users-apply", + cli.resolve_command( + "sync_saml_orgs", make_config(users=("alice",), apply=True) + ).artifact_name, + ) + self.assertEqual( + "sync-saml-orgs-created-after-dry-run", + cli.resolve_command( + "sync_saml_orgs", make_config(created_after="2026-01-01") + ).artifact_name, + ) + def test_created_after_config_accepts_yyyy_mm_dd_date_arguments(self) -> None: config = load_config_from_env(SRC_AUTH_PERMS_SYNC_CREATED_AFTER="2026-01-01") @@ -733,7 +778,7 @@ def test_cmd_get_no_backup_skips_snapshot_artifacts(self) -> None: with ( mock.patch.object(permissions_command, "load_discovery", return_value=([], [], {})), - mock.patch.object(permissions_command, "_load_get_users", return_value=[]), + mock.patch.object(permissions_command, "load_selected_users", return_value=[]), mock.patch.object(permissions_command.permissions_maps, "dump_code_hosts_yaml"), mock.patch.object(permissions_command.permissions_maps, "dump_auth_providers_yaml"), mock.patch.object( diff --git a/tests/unit/test_command_files.py b/tests/unit/test_command_files.py index 44e278c..91223bc 100644 --- a/tests/unit/test_command_files.py +++ b/tests/unit/test_command_files.py @@ -113,7 +113,7 @@ def run_cmd_get( "load_discovery", return_value=([make_auth_provider()], [make_external_service()], {}), ), - mock.patch.object(permissions_command, "_load_get_users", return_value=[]), + mock.patch.object(permissions_command, "load_selected_users", return_value=[]), mock.patch.object( permissions_command.permission_snapshot, "build_snapshot", diff --git a/tests/unit/test_saml_groups.py b/tests/unit/test_saml_groups.py index 7640785..67932b9 100644 --- a/tests/unit/test_saml_groups.py +++ b/tests/unit/test_saml_groups.py @@ -240,3 +240,89 @@ def test_compact_saml_group_users_keeps_only_org_sync_fields(self) -> None: ], compact_users, ) + + def test_organization_name_for_saml_group_uses_synced_prefix_and_sanitizes(self) -> None: + self.assertEqual( + "synced-okta-eng-team", + saml_groups.organization_name_for_saml_group("okta", "eng team!"), + ) + self.assertTrue( + saml_groups.is_synced_organization_name("synced-okta-eng-team"), + ) + self.assertFalse(saml_groups.is_synced_organization_name("okta-eng-team")) + + def test_organization_name_for_saml_group_rejects_unconvertible_parts(self) -> None: + with self.assertRaises(SystemExit): + saml_groups.organization_name_for_saml_group("okta", "!!!") + + def test_compact_scoped_saml_group_users_keeps_users_without_groups(self) -> None: + providers: list[shared_types.AuthProvider] = [ + { + "serviceType": "saml", + "serviceID": "https://idp.example.com", + "clientID": "sourcegraph", + "displayName": "SAML", + "isBuiltin": False, + "configID": "okta", + }, + ] + users: list[shared_types.User] = [ + { + "id": "user-1", + "username": "alice", + "builtinAuth": False, + "externalAccounts": { + "nodes": [ + { + "serviceType": "saml", + "serviceID": "https://idp.example.com", + "clientID": "sourcegraph", + "accountData": { + "Values": {"groups": {"Values": [{"Value": "engineering"}]}} + }, + }, + ] + }, + "organizations": { + "nodes": [ + {"id": "org-1", "name": "synced-okta-stale"}, + {"id": "org-2", "name": "manually-created-org"}, + ] + }, + }, + { + "id": "user-2", + "username": "bob", + "builtinAuth": True, + "externalAccounts": {"nodes": []}, + "organizations": {"nodes": [{"id": "org-1", "name": "synced-okta-stale"}]}, + }, + ] + + scoped_users = saml_groups.compact_scoped_saml_group_users(users, providers, {}) + + self.assertEqual( + [ + shared_types.ScopedSamlGroupUser( + user_id="user-1", + username="alice", + saml_group_memberships=( + shared_types.SamlGroupMembership( + provider_config_id="okta", group_name="engineering" + ), + ), + # The manually created org is NOT tool-managed: it must + # never appear as a removal candidate. + synced_organizations=({"id": "org-1", "name": "synced-okta-stale"},), + ), + # Users with zero group memberships are kept: scoped org + # sync must still remove them from synced orgs they left. + shared_types.ScopedSamlGroupUser( + user_id="user-2", + username="bob", + saml_group_memberships=(), + synced_organizations=({"id": "org-1", "name": "synced-okta-stale"},), + ), + ], + scoped_users, + )