{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://pif-spec.github.io/pif/v0.2/workflow-description.schema.json",
  "title": "PIF WorkflowDescription v0.2",
  "description": "A description of an AI workflow as the input to preflight assessment. PIF (Preflight Interchange Format) v0.2. Cumulative validation: this schema accepts both v0.1 documents (pif_version: \"0.1\") and v0.2 documents (pif_version: \"0.2\"). v0.1 documents that lack v0.2-only fields continue to validate; v0.2 documents must use the new regulatory_domains_self_declared field in place of the deprecated gxp_domains_self_declared.",
  "type": "object",
  "required": [
    "pif_version",
    "workflow_id",
    "intent",
    "ai_role",
    "output_destination",
    "human_gate"
  ],
  "properties": {
    "pif_version": {
      "type": "string",
      "enum": ["0.1", "0.2"],
      "description": "Version of the PIF spec this document conforms to. v0.2 schemas accept v0.1 documents for cumulative validation; v0.2 producers SHOULD emit \"0.2\"."
    },
    "workflow_id": {
      "type": "string",
      "pattern": "^[a-zA-Z0-9_:.-]{4,128}$",
      "description": "Stable identifier for this workflow. Used as the reference in PreflightAssertion.workflow_ref."
    },
    "title": {
      "type": "string",
      "maxLength": 200,
      "description": "Optional short human-readable title."
    },
    "intent": {
      "type": "string",
      "minLength": 10,
      "maxLength": 2000,
      "description": "Free-text description of what the workflow is intended to do, in plain language."
    },
    "intent_tags": {
      "type": "array",
      "items": { "type": "string" },
      "description": "Optional structured tags categorizing the workflow."
    },
    "ai_role": {
      "type": "string",
      "enum": [
        "decision",
        "recommendation",
        "draft",
        "classification",
        "extraction",
        "copilot",
        "summarization"
      ],
      "description": "What the AI is asked to do in the workflow."
    },
    "data_classes": {
      "type": "array",
      "items": {
        "type": "string",
        "enum": [
          "gxp_record",
          "pii",
          "phi",
          "manufacturing_data",
          "clinical_data",
          "regulatory_submission",
          "quality_data",
          "safety_data",
          "supply_chain_data",
          "commercial_data",
          "other"
        ]
      },
      "uniqueItems": true,
      "description": "Categories of data the workflow touches."
    },
    "jurisdictions": {
      "type": "array",
      "items": {
        "type": "string",
        "pattern": "^[A-Z]{2,5}$"
      },
      "uniqueItems": true,
      "description": "ISO 3166 alpha-2 country codes or recognized supranational codes (EU, ICH, WHO, PICS)."
    },
    "output_destination": {
      "type": "string",
      "enum": [
        "advisory",
        "regulated_decision",
        "system_of_record",
        "draft_for_review",
        "automated_action",
        "archive"
      ],
      "description": "Where the AI's output flows after generation."
    },
    "human_gate": {
      "type": "string",
      "enum": ["none", "review", "approve_each", "approve_batch", "post_hoc_audit"],
      "description": "Human checkpoint design."
    },
    "reversibility": {
      "type": "string",
      "enum": ["reversible", "partial", "irreversible"],
      "description": "Can the action triggered by this workflow be reversed if found to be wrong?"
    },
    "lifecycle_stage": {
      "type": "string",
      "enum": ["design", "pilot", "production", "retirement"],
      "description": "Where the workflow sits in its lifecycle."
    },
    "risk_tolerance": {
      "type": "string",
      "enum": ["very_low", "low", "medium", "high"],
      "description": "Stated risk tolerance of the workflow owner."
    },
    "gxp_domains_self_declared": {
      "type": "array",
      "items": {
        "type": "string",
        "enum": [
          "GMP", "GDP", "GCP", "GLP", "GVP",
          "CSV", "data_integrity", "quality_systems",
          "regulatory_affairs", "pharmacovigilance", "labeling",
          "none_claimed"
        ]
      },
      "uniqueItems": true,
      "deprecated": true,
      "description": "DEPRECATED in v0.2: use regulatory_domains_self_declared instead. Retained for cumulative validation of v0.1 documents. Removed in v1.0. v0.2 producers MUST NOT emit this field; v0.2 consumers MUST accept it from v0.1 documents and treat values as equivalent to gxp:-prefixed entries in regulatory_domains_self_declared."
    },
    "regulatory_domains_self_declared": {
      "type": "array",
      "items": { "$ref": "#/$defs/RegulatoryDomainCurie" },
      "uniqueItems": true,
      "description": "Regulatory framework / domain CURIEs the workflow owner believes the workflow touches. Format <namespace>:<token> per the closed namespace list plus open token pattern. Namespace MUST be lowercase; tokens case-preserving and case-sensitive; no whitespace; no multiple colons (use . or _ for hierarchy); US-ASCII only. Consumers preparing canonical forms for signing MUST lexicographically sort the array."
    },
    "context_notes": {
      "type": "string",
      "maxLength": 5000,
      "description": "Free-text additional context: existing controls, integration points, prior assessments, organizational constraints."
    },
    "extensions": {
      "type": "object",
      "description": "Implementation-specific extensions. Field names should be prefixed with the implementing tool (e.g., 'preclari:custom_field'). Implementations MUST NOT reject documents containing unknown extensions."
    }
  },
  "additionalProperties": false,
  "$defs": {
    "RegulatoryDomainCurie": {
      "type": "string",
      "maxLength": 128,
      "description": "A semi-closed CURIE in the format <namespace>:<token>, OR the reserved literal 'none_claimed'. Namespace is one of the closed list (slow-growing via spec PR); token is pattern-validated (open, semantic recognition deferred to the implementation/corpus layer). The 'ext' namespace is reserved for vendor-internal frameworks; convention recommends vendor sub-prefix in the token.",
      "oneOf": [
        { "const": "none_claimed" },
        {
          "type": "string",
          "pattern": "^(gxp|ich|iso|iec|imdrf|nist|eu_mdr|eu_ivdr|eu_ai_act|eu_eba|eu_ecb|eu_dora|eu_csrd|eu_cs3d|eu_eudr|eu_battery_regulation|eu_green_claims|eu_nis2|eu_cra|eu_data_act|eu_dsa|eu_dma|eu_mica|eu_gdpr|eu_sfdr|us_fda_21cfr_803|us_fda_21cfr_820|us_fda_21cfr_11|us_fda_samd|us_hhs_hipaa|us_frb_sr_11_7|uk_mhra|jp_pmda|ch_swissmedic|ca_health_canada|au_tga|mdcg|gamp5|iso27001|iso_42001|iso_23894|iso_14971|iec_62304|nist_sp800_53|ext):[a-zA-Z0-9_.-]+$"
        }
      ]
    }
  }
}
