{
  "_meta": {
    "document": "AI Shockwave Timeline",
    "schemaVersion": "2026.05.02",
    "generatedAt": "2026-05-29T18:26:35.839Z",
    "canonicalUrl": "https://brianletort.ai/industry/shocks",
    "markdownUrl": "https://brianletort.ai/industry/shocks/llm.md",
    "yamlUrl": "https://brianletort.ai/industry/shocks/shocks.yaml",
    "treeUrl": "https://brianletort.ai/industry/tree",
    "lastUpdated": "2026-05-29",
    "dateRange": {
      "start": "2024-02-15",
      "end": "2026-05-28"
    }
  },
  "archetypes": [
    {
      "id": "efficiency",
      "name": "Efficiency shock",
      "signature_metric": "$/GPU-hour, KV cache, tokens/sec, PFLOPS/W",
      "strategic_interpretation": "The winning frontier increasingly comes from servability and cost per useful token, not only benchmark rank.\n",
      "color": "#22d3ee"
    },
    {
      "id": "reasoning",
      "name": "Reasoning shock",
      "signature_metric": "AIME, GPQA, Codeforces, thinking budget",
      "strategic_interpretation": "Reasoning moved from \"harder prompts\" to an explicit computational budget allocated at inference time.\n",
      "color": "#a78bfa"
    },
    {
      "id": "context",
      "name": "Context & interface shock",
      "signature_metric": "context length, retrieval recall, latency, modalities",
      "strategic_interpretation": "Context and real-time interaction became product features and an infrastructure burden at the same time.\n",
      "color": "#f472b6"
    },
    {
      "id": "open_frontier",
      "name": "Open-frontier shock",
      "signature_metric": "params, context, license/distillation terms, ecosystem",
      "strategic_interpretation": "The closed/open separation narrowed enough to change enterprise adoption and price expectations.\n",
      "color": "#34d399"
    },
    {
      "id": "interoperability",
      "name": "Interoperability & network shock",
      "signature_metric": "supported standards/partners, protocol, fabric bandwidth",
      "strategic_interpretation": "The model stopped being the whole system; protocols and fabrics became the strategic choke points.\n",
      "color": "#fbbf24"
    }
  ],
  "layers": [
    {
      "id": "hardware",
      "name": "Hardware",
      "short": "H",
      "shape": "square",
      "color": "#818cf8"
    },
    {
      "id": "software",
      "name": "Software",
      "short": "S",
      "shape": "circle",
      "color": "#34d399"
    },
    {
      "id": "networking",
      "name": "Networking & protocol",
      "short": "N",
      "shape": "hexagon",
      "color": "#fbbf24"
    }
  ],
  "magnitudes": [
    {
      "id": "high",
      "label": "High",
      "description": "Reset a frontier assumption broadly and durably."
    },
    {
      "id": "medium",
      "label": "Medium",
      "description": "Strategically important; deployment effects lag the announcement."
    }
  ],
  "shocks": [
    {
      "id": "gemini-1-5",
      "name": "Gemini 1.5",
      "date": "2024-02-15",
      "vendor": "Google",
      "logo_domain": "deepmind.google",
      "archetype": "context",
      "layers": [
        "software"
      ],
      "magnitude": "high",
      "magnitude_score": 85,
      "magnitude_rationale": "Turned long context from a niche demo into a serious product and infrastructure design axis.\n",
      "confidence": "high",
      "concise_description": "A new MoE architecture with 128K standard context, a 1M-token preview, and report evidence of near-perfect retrieval beyond 10M tokens.\n",
      "assumption_changed": "Long context becomes a production capability, not a demo.",
      "impact": [
        {
          "axis": "capability",
          "label": "Standard context window",
          "before": "32K",
          "after": "1M",
          "delta": "31x",
          "unit": "tokens",
          "direction": "up",
          "headline": true,
          "source_ref": "src-gemini-1-5"
        },
        {
          "axis": "systems",
          "label": "Long-context retrieval recall",
          "after": ">99%",
          "direction": "up",
          "source_ref": "src-gemini-1-5"
        },
        {
          "axis": "capability",
          "label": "Tested retrieval ceiling",
          "after": "10M tokens",
          "direction": "up",
          "source_ref": "src-gemini-1-5"
        }
      ],
      "downstream": {
        "short": "Long-document and long-video workflows become practical.",
        "medium": "RAG assumptions weaken as more state fits in-context.",
        "long": "Context length becomes a pricing and systems-architecture variable."
      },
      "parents": [],
      "source_refs": [
        "src-gemini-1-5"
      ]
    },
    {
      "id": "nvidia-blackwell",
      "name": "NVIDIA Blackwell & GB200 NVL72",
      "date": "2024-03-18",
      "vendor": "NVIDIA",
      "logo_domain": "nvidia.com",
      "archetype": "efficiency",
      "layers": [
        "hardware",
        "networking"
      ],
      "magnitude": "high",
      "magnitude_score": 88,
      "magnitude_rationale": "Made the rack, not the single GPU, the practical unit of frontier inference and training economics.\n",
      "confidence": "high",
      "concise_description": "Fifth-generation NVLink and a 72-GPU rack exposing 130 TB/s of low-latency GPU communication, with claims of 30x real-time trillion-parameter inference vs the H100 era.\n",
      "assumption_changed": "The rack becomes the accelerator.",
      "impact": [
        {
          "axis": "economics",
          "label": "Real-time LLM inference",
          "before": "1x (H100)",
          "after": "30x",
          "delta": "30x",
          "direction": "up",
          "headline": true,
          "source_ref": "src-nvidia-blackwell"
        },
        {
          "axis": "economics",
          "label": "Cost & energy per inference",
          "before": "1x",
          "after": "0.04x",
          "delta": "25x lower",
          "direction": "down",
          "source_ref": "src-nvidia-blackwell"
        },
        {
          "axis": "systems",
          "label": "Intra-rack GPU bandwidth",
          "after": "130 TB/s",
          "direction": "up",
          "source_ref": "src-nvidia-blackwell"
        }
      ],
      "downstream": {
        "short": "Hyperscaler capex pivots toward rack-scale systems.",
        "medium": "Rack-scale \"AI factory\" design becomes standard.",
        "long": "Interconnect bandwidth becomes a first-class competitive moat."
      },
      "parents": [],
      "source_refs": [
        "src-nvidia-blackwell"
      ]
    },
    {
      "id": "deepseek-v2",
      "name": "DeepSeek-V2",
      "date": "2024-05-06",
      "vendor": "DeepSeek",
      "logo_domain": "deepseek.com",
      "archetype": "efficiency",
      "layers": [
        "software"
      ],
      "magnitude": "high",
      "magnitude_score": 84,
      "magnitude_rationale": "First clear open-source proof that memory efficiency and sparse activation move the economics frontier, not just the benchmark frontier.\n",
      "confidence": "high",
      "concise_description": "A 236B-total / 21B-active MoE with Multi-head Latent Attention and DeepSeekMoE, reporting dramatic KV-cache and training-cost reductions.\n",
      "assumption_changed": "KV-cache compression changes the economics of serving.",
      "impact": [
        {
          "axis": "economics",
          "label": "KV cache footprint",
          "before": "100%",
          "after": "6.7%",
          "delta": "-93.3%",
          "direction": "down",
          "headline": true,
          "source_ref": "src-deepseek-v2"
        },
        {
          "axis": "economics",
          "label": "Training cost vs DeepSeek 67B",
          "before": "100%",
          "after": "57.5%",
          "delta": "-42.5%",
          "direction": "down",
          "source_ref": "src-deepseek-v2"
        },
        {
          "axis": "systems",
          "label": "Max generation throughput",
          "before": "1x",
          "after": "5.76x",
          "direction": "up",
          "source_ref": "src-deepseek-v2"
        }
      ],
      "downstream": {
        "short": "Cheaper open serving on commodity GPU fleets.",
        "medium": "KV compression and MLA become design references.",
        "long": "Architecture shifts toward memory-aware inference efficiency."
      },
      "parents": [],
      "source_refs": [
        "src-deepseek-v2"
      ]
    },
    {
      "id": "gpt-4o",
      "name": "GPT-4o",
      "date": "2024-05-13",
      "vendor": "OpenAI",
      "logo_domain": "openai.com",
      "archetype": "context",
      "layers": [
        "software"
      ],
      "magnitude": "high",
      "magnitude_score": 82,
      "magnitude_rationale": "Reset the user expectation for latency and made real-time multimodal interaction feel native rather than stitched together.\n",
      "confidence": "high",
      "concise_description": "An end-to-end omni model for text, image, and audio with sub-second audio response latency and a 50% cheaper API than GPT-4 Turbo.\n",
      "assumption_changed": "Latency drops into conversation range.",
      "impact": [
        {
          "axis": "capability",
          "label": "Audio response latency",
          "before": "~2800 ms",
          "after": "320 ms",
          "delta": "~9x faster",
          "direction": "down",
          "headline": true,
          "source_ref": "src-gpt-4o"
        },
        {
          "axis": "economics",
          "label": "API price vs GPT-4 Turbo",
          "before": "100%",
          "after": "50%",
          "delta": "-50%",
          "direction": "down",
          "source_ref": "src-gpt-4o"
        },
        {
          "axis": "capability",
          "label": "Modalities in one model",
          "after": "text + image + audio",
          "direction": "up",
          "source_ref": "src-gpt-4o"
        }
      ],
      "downstream": {
        "short": "Voice agents and live multimodal UX become credible.",
        "medium": "Separate modality stacks look increasingly obsolete.",
        "long": "Real-time multimodal inference becomes a baseline expectation."
      },
      "parents": [],
      "source_refs": [
        "src-gpt-4o"
      ]
    },
    {
      "id": "llama-3-1-405b",
      "name": "Llama 3.1 405B",
      "date": "2024-07-23",
      "vendor": "Meta",
      "logo_domain": "meta.com",
      "archetype": "open_frontier",
      "layers": [
        "software"
      ],
      "magnitude": "high",
      "magnitude_score": 80,
      "magnitude_rationale": "Narrowed the frontier/open gap enough to change enterprise procurement and experimentation behavior.\n",
      "confidence": "high",
      "concise_description": "The \"first frontier-level open source AI model,\" with 128K context, training on over 15T tokens across more than 16,000 H100 GPUs.\n",
      "assumption_changed": "Open weights reach the frontier.",
      "impact": [
        {
          "axis": "capability",
          "label": "Open-weight frontier parity",
          "after": "first frontier-level open model",
          "direction": "up",
          "headline": true,
          "source_ref": "src-llama-3-1"
        },
        {
          "axis": "systems",
          "label": "Context window",
          "before": "8K (Llama 2)",
          "after": "128K",
          "delta": "16x",
          "direction": "up",
          "source_ref": "src-llama-3-1"
        },
        {
          "axis": "economics",
          "label": "Training scale",
          "after": ">15T tokens / 16k H100",
          "direction": "up",
          "source_ref": "src-llama-3-1"
        }
      ],
      "downstream": {
        "short": "Open-weight pilots expand inside enterprises.",
        "medium": "Synthetic-data generation and distillation accelerate.",
        "long": "Open weights place structural pricing pressure on closed APIs."
      },
      "parents": [],
      "source_refs": [
        "src-llama-3-1"
      ]
    },
    {
      "id": "openai-o1",
      "name": "OpenAI o1",
      "date": "2024-09-12",
      "vendor": "OpenAI",
      "logo_domain": "openai.com",
      "archetype": "reasoning",
      "layers": [
        "software"
      ],
      "magnitude": "high",
      "magnitude_score": 88,
      "magnitude_rationale": "Made inference-time scaling a roadmap category in its own right.\n",
      "confidence": "high",
      "concise_description": "A reasoning series designed to \"spend more time thinking,\" with smooth gains from both train-time and test-time compute.\n",
      "assumption_changed": "Reasoning gets its own compute budget.",
      "impact": [
        {
          "axis": "capability",
          "label": "AIME 2024 (pass@1)",
          "before": "13.4% (GPT-4o)",
          "after": "74.4%",
          "delta": "+61 pts",
          "direction": "up",
          "headline": true,
          "source_ref": "src-openai-o1"
        },
        {
          "axis": "capability",
          "label": "GPQA Diamond (pass@1)",
          "after": "77.3%",
          "direction": "up",
          "source_ref": "src-openai-o1"
        },
        {
          "axis": "systems",
          "label": "New scaling axis",
          "after": "test-time compute",
          "direction": "up",
          "source_ref": "src-openai-o1"
        }
      ],
      "downstream": {
        "short": "Separate \"reasoner\" tiers appear in model menus.",
        "medium": "Thought-budget controls and model routing proliferate.",
        "long": "Inference compute allocation rivals pretraining scale."
      },
      "parents": [],
      "source_refs": [
        "src-openai-o1"
      ]
    },
    {
      "id": "mcp",
      "name": "Model Context Protocol",
      "date": "2024-11-25",
      "vendor": "Anthropic",
      "logo_domain": "anthropic.com",
      "archetype": "interoperability",
      "layers": [
        "networking",
        "software"
      ],
      "magnitude": "medium",
      "magnitude_score": 65,
      "magnitude_rationale": "Smaller immediate effect than a frontier model launch, but it changed how developers think about integration debt in agent systems.\n",
      "confidence": "high",
      "concise_description": "An open standard for secure, two-way connections between AI tools and data sources, with a spec/SDK and reference servers.\n",
      "assumption_changed": "Model-to-tool connectivity standardizes.",
      "impact": [
        {
          "axis": "systems",
          "label": "Model-to-tool integration",
          "before": "bespoke per-tool connectors",
          "after": "one open protocol",
          "direction": "up",
          "headline": true,
          "source_ref": "src-mcp"
        },
        {
          "axis": "capability",
          "label": "Reference servers at launch",
          "after": "Drive, Slack, GitHub, Git, Postgres, Puppeteer",
          "direction": "up",
          "source_ref": "src-mcp"
        }
      ],
      "downstream": {
        "short": "Bespoke connector duplication starts to look wasteful.",
        "medium": "MCP server ecosystems expand across vendors.",
        "long": "Tools and data become modular \"ports\" for any model."
      },
      "parents": [],
      "source_refs": [
        "src-mcp"
      ]
    },
    {
      "id": "deepseek-v3",
      "name": "DeepSeek-V3",
      "date": "2024-12-26",
      "vendor": "DeepSeek",
      "logo_domain": "deepseek.com",
      "archetype": "efficiency",
      "layers": [
        "software",
        "hardware",
        "networking"
      ],
      "magnitude": "high",
      "magnitude_score": 90,
      "magnitude_rationale": "Made algorithm-framework-hardware co-design legible as an open competitive strategy, not just a hyperscaler secret.\n",
      "confidence": "high",
      "concise_description": "671B total / 37B active with FP8 mixed-precision training, multi-token prediction, and cross-node MoE communication overlap.\n",
      "assumption_changed": "Open frontier quality at a visible GPU-hour cost.",
      "impact": [
        {
          "axis": "economics",
          "label": "Full training compute",
          "after": "2.788M H800 GPU-hours",
          "direction": "down",
          "headline": true,
          "source_ref": "src-deepseek-v3"
        },
        {
          "axis": "economics",
          "label": "Decoding throughput (MTP)",
          "before": "1x",
          "after": "1.8x",
          "direction": "up",
          "source_ref": "src-deepseek-v3"
        },
        {
          "axis": "systems",
          "label": "Training precision",
          "before": "BF16",
          "after": "FP8 mixed",
          "direction": "down",
          "source_ref": "src-deepseek-v3"
        }
      ],
      "downstream": {
        "short": "API price pressure rises across the market.",
        "medium": "Open infra stacks copy FP8, MTP, and comm overlap.",
        "long": "\"Model quality per GPU-hour\" becomes a core frontier KPI."
      },
      "parents": [
        "deepseek-v2"
      ],
      "source_refs": [
        "src-deepseek-v3"
      ]
    },
    {
      "id": "deepseek-r1",
      "name": "DeepSeek-R1",
      "date": "2025-01-20",
      "vendor": "DeepSeek",
      "logo_domain": "deepseek.com",
      "archetype": "reasoning",
      "layers": [
        "software"
      ],
      "magnitude": "high",
      "magnitude_score": 95,
      "magnitude_rationale": "The canonical \"DeepSeek moment\": an open reasoning model that materially changed market expectations for price, openness, and distillability.\n",
      "confidence": "high",
      "concise_description": "R1 and R1-Zero showed large-scale RL could induce reasoning, with a cold-start multi-stage pipeline and six distilled smaller models.\n",
      "assumption_changed": "Open reasoning reaches parity and becomes distillable.",
      "impact": [
        {
          "axis": "capability",
          "label": "AIME 2024 (pass@1)",
          "after": "79.8%",
          "direction": "up",
          "headline": true,
          "source_ref": "src-deepseek-r1"
        },
        {
          "axis": "economics",
          "label": "Reasoning openness",
          "before": "closed (o1)",
          "after": "open weights + 6 distilled",
          "direction": "up",
          "source_ref": "src-deepseek-r1"
        },
        {
          "axis": "capability",
          "label": "MATH-500",
          "after": "97.3%",
          "direction": "up",
          "source_ref": "src-deepseek-r1"
        }
      ],
      "downstream": {
        "short": "A reasoning price war begins.",
        "medium": "Dense distilled reasoners proliferate.",
        "long": "Open reasoning becomes a research baseline and commodity."
      },
      "parents": [
        "deepseek-v3"
      ],
      "source_refs": [
        "src-deepseek-r1"
      ]
    },
    {
      "id": "claude-3-7-sonnet",
      "name": "Claude 3.7 Sonnet",
      "date": "2025-02-24",
      "vendor": "Anthropic",
      "logo_domain": "anthropic.com",
      "archetype": "reasoning",
      "layers": [
        "software"
      ],
      "magnitude": "medium",
      "magnitude_score": 70,
      "magnitude_rationale": "Did not originate the reasoning trend, but made \"fast mode vs deep-think mode\" a practical product interaction pattern.\n",
      "confidence": "high",
      "concise_description": "The \"first hybrid reasoning model,\" with a visible extended-thinking mode, API control over thinking budget, and Claude Code.\n",
      "assumption_changed": "Thinking budget becomes product UI.",
      "impact": [
        {
          "axis": "capability",
          "label": "Thinking mode",
          "before": "implicit",
          "after": "user-controlled budget to 128K",
          "direction": "up",
          "headline": true,
          "source_ref": "src-claude-3-7"
        },
        {
          "axis": "systems",
          "label": "Agentic coding",
          "after": "Claude Code (terminal)",
          "direction": "up",
          "source_ref": "src-claude-3-7"
        }
      ],
      "downstream": {
        "short": "Product teams expose thought-budget controls.",
        "medium": "Agentic coding workflows mature.",
        "long": "Unified fast+slow models replace fragmented model menus."
      },
      "parents": [],
      "source_refs": [
        "src-claude-3-7"
      ]
    },
    {
      "id": "a2a",
      "name": "Agent2Agent Protocol",
      "date": "2025-04-09",
      "vendor": "Google",
      "logo_domain": "google.com",
      "archetype": "interoperability",
      "layers": [
        "networking",
        "software"
      ],
      "magnitude": "medium",
      "magnitude_score": 66,
      "magnitude_rationale": "Early, but strategically important: it treats a multi-agent estate as a network problem rather than a single-model problem.\n",
      "confidence": "medium",
      "concise_description": "A protocol with 50+ launch partners using HTTP, SSE, and JSON-RPC, positioned as complementary to MCP for multi-agent tasks.\n",
      "assumption_changed": "Agents become network peers.",
      "impact": [
        {
          "axis": "systems",
          "label": "Cross-agent interoperability",
          "before": "single-vendor orchestration",
          "after": "vendor-neutral protocol",
          "direction": "up",
          "headline": true,
          "source_ref": "src-a2a"
        },
        {
          "axis": "capability",
          "label": "Launch partners",
          "after": "50+",
          "direction": "up",
          "source_ref": "src-a2a"
        }
      ],
      "downstream": {
        "short": "Enterprise pilots for cross-agent workflows rise.",
        "medium": "Vendor-neutral orchestration becomes easier.",
        "long": "Agent meshes become an architecture category."
      },
      "parents": [],
      "source_refs": [
        "src-a2a"
      ]
    },
    {
      "id": "nvlink-fusion",
      "name": "NVLink Fusion",
      "date": "2025-05-18",
      "vendor": "NVIDIA",
      "logo_domain": "nvidia.com",
      "archetype": "interoperability",
      "layers": [
        "hardware",
        "networking"
      ],
      "magnitude": "medium",
      "magnitude_score": 64,
      "magnitude_rationale": "Significance is architectural and ecosystem-level; real deployment effects lag the announcement.\n",
      "confidence": "medium",
      "concise_description": "Opened the NVLink ecosystem to semi-custom AI infrastructure, letting custom CPUs and ASICs pair with NVIDIA rack-scale systems.\n",
      "assumption_changed": "Custom silicon plugs into the NVIDIA fabric.",
      "impact": [
        {
          "axis": "systems",
          "label": "Custom silicon in NVIDIA fabric",
          "before": "NVIDIA CPUs/GPUs only",
          "after": "custom CPU/ASIC pairing",
          "direction": "up",
          "headline": true,
          "source_ref": "src-nvlink-fusion"
        },
        {
          "axis": "economics",
          "label": "Scale-out networking",
          "after": "800 Gb/s",
          "direction": "up",
          "source_ref": "src-nvlink-fusion"
        }
      ],
      "downstream": {
        "short": "Sovereign and custom AI stack designs become plausible.",
        "medium": "CPU/ASIC heterogeneity grows inside NVIDIA fabrics.",
        "long": "The fabric becomes the platform, not only the GPU."
      },
      "parents": [
        "nvidia-blackwell"
      ],
      "source_refs": [
        "src-nvlink-fusion"
      ]
    },
    {
      "id": "nvidia-rubin",
      "name": "NVIDIA Rubin & Vera Rubin NVL72",
      "date": "2026-01-05",
      "vendor": "NVIDIA",
      "logo_domain": "nvidia.com",
      "archetype": "efficiency",
      "layers": [
        "hardware",
        "networking"
      ],
      "magnitude": "medium",
      "magnitude_score": 72,
      "magnitude_rationale": "Potentially enormous, but still early in the deployment cycle; partner availability is in the second half of 2026.\n",
      "confidence": "medium",
      "concise_description": "72 Rubin GPUs and 36 Vera CPUs with 260 TB/s rack bandwidth, 20.7 TB GPU memory, and claims of 10x lower inference token cost than Blackwell.\n",
      "assumption_changed": "Context memory and token economics dominate infra.",
      "impact": [
        {
          "axis": "economics",
          "label": "Inference token cost vs Blackwell",
          "before": "1x",
          "after": "0.1x",
          "delta": "10x lower",
          "direction": "down",
          "headline": true,
          "source_ref": "src-nvidia-rubin"
        },
        {
          "axis": "economics",
          "label": "GPUs to train MoE",
          "before": "1x",
          "after": "0.25x",
          "delta": "4x fewer",
          "direction": "down",
          "source_ref": "src-nvidia-rubin"
        },
        {
          "axis": "systems",
          "label": "Rack bandwidth",
          "before": "130 TB/s",
          "after": "260 TB/s",
          "direction": "up",
          "source_ref": "src-nvidia-rubin"
        }
      ],
      "downstream": {
        "short": "Roadmap resets for hyperscale and neocloud buyers.",
        "medium": "Long-context and agentic inference infra is redesigned.",
        "long": "Context-memory and storage fabrics join the model stack."
      },
      "parents": [
        "nvlink-fusion"
      ],
      "source_refs": [
        "src-nvidia-rubin"
      ]
    },
    {
      "id": "deepseek-v4",
      "name": "DeepSeek-V4 Preview",
      "date": "2026-04-24",
      "vendor": "DeepSeek",
      "logo_domain": "deepseek.com",
      "archetype": "context",
      "layers": [
        "software",
        "hardware"
      ],
      "magnitude": "high",
      "magnitude_score": 86,
      "magnitude_rationale": "Pushes million-token context, open weights, and agent orientation into one package and makes them feel default rather than exotic.\n",
      "confidence": "medium",
      "concise_description": "V4-Pro and V4-Flash with a 1M-token standard context (1.6T/49B active for Pro) at a fraction of the inference FLOPs and KV cache of V3.2.\n",
      "assumption_changed": "1M context becomes default and open.",
      "impact": [
        {
          "axis": "capability",
          "label": "Standard context window",
          "before": "128K (V3)",
          "after": "1M",
          "delta": "8x",
          "direction": "up",
          "headline": true,
          "source_ref": "src-deepseek-v4"
        },
        {
          "axis": "economics",
          "label": "Single-token inference FLOPs vs V3.2",
          "before": "100%",
          "after": "27%",
          "delta": "-73%",
          "direction": "down",
          "source_ref": "src-deepseek-v4"
        },
        {
          "axis": "economics",
          "label": "KV cache vs V3.2 at 1M context",
          "before": "100%",
          "after": "10%",
          "delta": "-90%",
          "direction": "down",
          "source_ref": "src-deepseek-v4"
        }
      ],
      "downstream": {
        "short": "Long-context agent apps become practical.",
        "medium": "Hybrid full-context + retrieval replaces pure-RAG defaults.",
        "long": "1M context becomes a mainstream premium/open tier."
      },
      "parents": [
        "deepseek-v3"
      ],
      "source_refs": [
        "src-deepseek-v4"
      ]
    },
    {
      "id": "claude-opus-4-8",
      "name": "Claude Opus 4.8",
      "date": "2026-05-28",
      "vendor": "Anthropic",
      "logo_domain": "anthropic.com",
      "archetype": "reasoning",
      "layers": [
        "software"
      ],
      "magnitude": "medium",
      "magnitude_score": 66,
      "magnitude_rationale": "Anthropic frames it as a modest improvement; the genuine shift is agentic autonomy — long-running, low-supervision multi-agent execution — plus cheaper fast-mode serving.\n",
      "confidence": "high",
      "concise_description": "A modest-but-tangible Opus upgrade focused on reliability and agentic judgment, launched with effort control and a Claude Code \"dynamic workflows\" mode that runs hundreds of parallel subagents for codebase-scale migrations.\n",
      "assumption_changed": "Frontier models run long, low-supervision agentic work reliably.",
      "impact": [
        {
          "axis": "capability",
          "label": "Unflagged code flaws vs Opus 4.7",
          "before": "1x",
          "after": "0.25x",
          "delta": "4x fewer",
          "direction": "down",
          "headline": true,
          "source_ref": "src-claude-opus-4-8"
        },
        {
          "axis": "economics",
          "label": "Fast-mode price vs prior models",
          "before": "1x",
          "after": "0.33x",
          "delta": "3x cheaper",
          "direction": "down",
          "source_ref": "src-claude-opus-4-8"
        },
        {
          "axis": "systems",
          "label": "Agentic scale (Claude Code)",
          "after": "100s of parallel subagents",
          "direction": "up",
          "source_ref": "src-claude-opus-4-8"
        }
      ],
      "downstream": {
        "short": "Effort control and parallel-subagent orchestration spread in agentic coding.",
        "medium": "Long-running, low-supervision agent workflows move from demo to default.",
        "long": "Frontier competition shifts from chatbot quality to autonomous-agent reliability."
      },
      "parents": [
        "claude-3-7-sonnet"
      ],
      "source_refs": [
        "src-claude-opus-4-8"
      ]
    }
  ],
  "links": [
    {
      "source": "openai-o1",
      "target": "deepseek-r1",
      "kind": "influence",
      "label": "popularizes test-time scaling"
    },
    {
      "source": "llama-3-1-405b",
      "target": "deepseek-r1",
      "kind": "influence",
      "label": "open-weight frontier precedent"
    },
    {
      "source": "deepseek-r1",
      "target": "claude-3-7-sonnet",
      "kind": "influence",
      "label": "open reasoning pressures hybrid productization"
    },
    {
      "source": "mcp",
      "target": "a2a",
      "kind": "influence",
      "label": "complements (tool vs agent interop)"
    },
    {
      "source": "nvidia-blackwell",
      "target": "deepseek-v3",
      "kind": "influence",
      "label": "rack-scale economics enable FP8 co-design"
    },
    {
      "source": "gemini-1-5",
      "target": "deepseek-v4",
      "kind": "influence",
      "label": "long-context design becomes mainstream"
    },
    {
      "source": "nvidia-rubin",
      "target": "deepseek-v4",
      "kind": "influence",
      "label": "context-centric infra enables 1M serving"
    },
    {
      "source": "deepseek-v2",
      "target": "gpt-4o",
      "kind": "influence",
      "label": "memory-efficiency pressure on serving cost"
    }
  ],
  "edges": [
    {
      "source": "deepseek-v2",
      "target": "deepseek-v3",
      "kind": "lineage",
      "label": "lineage"
    },
    {
      "source": "deepseek-v3",
      "target": "deepseek-r1",
      "kind": "lineage",
      "label": "lineage"
    },
    {
      "source": "nvidia-blackwell",
      "target": "nvlink-fusion",
      "kind": "lineage",
      "label": "lineage"
    },
    {
      "source": "nvlink-fusion",
      "target": "nvidia-rubin",
      "kind": "lineage",
      "label": "lineage"
    },
    {
      "source": "deepseek-v3",
      "target": "deepseek-v4",
      "kind": "lineage",
      "label": "lineage"
    },
    {
      "source": "claude-3-7-sonnet",
      "target": "claude-opus-4-8",
      "kind": "lineage",
      "label": "lineage"
    },
    {
      "source": "openai-o1",
      "target": "deepseek-r1",
      "kind": "influence",
      "label": "popularizes test-time scaling"
    },
    {
      "source": "llama-3-1-405b",
      "target": "deepseek-r1",
      "kind": "influence",
      "label": "open-weight frontier precedent"
    },
    {
      "source": "deepseek-r1",
      "target": "claude-3-7-sonnet",
      "kind": "influence",
      "label": "open reasoning pressures hybrid productization"
    },
    {
      "source": "mcp",
      "target": "a2a",
      "kind": "influence",
      "label": "complements (tool vs agent interop)"
    },
    {
      "source": "nvidia-blackwell",
      "target": "deepseek-v3",
      "kind": "influence",
      "label": "rack-scale economics enable FP8 co-design"
    },
    {
      "source": "gemini-1-5",
      "target": "deepseek-v4",
      "kind": "influence",
      "label": "long-context design becomes mainstream"
    },
    {
      "source": "nvidia-rubin",
      "target": "deepseek-v4",
      "kind": "influence",
      "label": "context-centric infra enables 1M serving"
    },
    {
      "source": "deepseek-v2",
      "target": "gpt-4o",
      "kind": "influence",
      "label": "memory-efficiency pressure on serving cost"
    }
  ],
  "sources": [
    {
      "id": "src-gemini-1-5",
      "type": "announcement",
      "title": "Our next-generation model: Gemini 1.5",
      "url": "https://blog.google/technology/ai/google-gemini-next-generation-model-february-2024/",
      "retrieved_date": "2026-05-28",
      "public": true,
      "summary": "Google announcement plus technical report introducing the MoE architecture, 128K standard context, and 1M-token preview.\n"
    },
    {
      "id": "src-nvidia-blackwell",
      "type": "announcement",
      "title": "NVIDIA Blackwell Platform Arrives to Power a New Era of Computing",
      "url": "https://nvidianews.nvidia.com/news/nvidia-blackwell-platform-arrives-to-power-a-new-era-of-computing",
      "retrieved_date": "2026-05-28",
      "public": true,
      "summary": "Blackwell, fifth-generation NVLink, GB200 NVL72, and rack-scale inference and energy-efficiency claims.\n"
    },
    {
      "id": "src-deepseek-v2",
      "type": "report",
      "title": "DeepSeek-V2: A Strong, Economical, and Efficient MoE Language Model",
      "url": "https://github.com/deepseek-ai/DeepSeek-V2",
      "retrieved_date": "2026-05-28",
      "public": true,
      "summary": "DeepSeek-V2 repository and technical report covering MLA, DeepSeekMoE, KV-cache reduction, and throughput.\n"
    },
    {
      "id": "src-gpt-4o",
      "type": "announcement",
      "title": "Hello GPT-4o",
      "url": "https://openai.com/index/hello-gpt-4o/",
      "retrieved_date": "2026-05-28",
      "public": true,
      "summary": "OpenAI announcement and system card for the omni model, including audio latency and API pricing claims.\n"
    },
    {
      "id": "src-llama-3-1",
      "type": "announcement",
      "title": "Introducing Llama 3.1: Our most capable models to date",
      "url": "https://ai.meta.com/blog/meta-llama-3-1/",
      "retrieved_date": "2026-05-28",
      "public": true,
      "summary": "Meta announcement for Llama 3.1 405B, 128K context, training scale, and FP8 quantization.\n"
    },
    {
      "id": "src-openai-o1",
      "type": "announcement",
      "title": "Introducing OpenAI o1",
      "url": "https://openai.com/index/introducing-openai-o1-preview/",
      "retrieved_date": "2026-05-28",
      "public": true,
      "summary": "OpenAI o1 product and research pages describing test-time compute scaling and benchmark results.\n"
    },
    {
      "id": "src-mcp",
      "type": "announcement",
      "title": "Introducing the Model Context Protocol",
      "url": "https://www.anthropic.com/news/model-context-protocol",
      "retrieved_date": "2026-05-28",
      "public": true,
      "summary": "Anthropic announcement open-sourcing MCP, with spec/SDK and reference servers.\n"
    },
    {
      "id": "src-deepseek-v3",
      "type": "report",
      "title": "DeepSeek-V3 Technical Report",
      "url": "https://github.com/deepseek-ai/DeepSeek-V3",
      "retrieved_date": "2026-05-28",
      "public": true,
      "summary": "DeepSeek-V3 release and report covering FP8 training, multi-token prediction, and full training GPU-hours.\n"
    },
    {
      "id": "src-deepseek-r1",
      "type": "report",
      "title": "DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via RL",
      "url": "https://github.com/deepseek-ai/DeepSeek-R1",
      "retrieved_date": "2026-05-28",
      "public": true,
      "summary": "DeepSeek-R1 release and paper, including R1-Zero, distilled models, and reasoning benchmark results.\n"
    },
    {
      "id": "src-claude-3-7",
      "type": "announcement",
      "title": "Claude 3.7 Sonnet and Claude Code",
      "url": "https://www.anthropic.com/news/claude-3-7-sonnet",
      "retrieved_date": "2026-05-28",
      "public": true,
      "summary": "Anthropic announcement of the first hybrid reasoning model with extended-thinking controls and Claude Code.\n"
    },
    {
      "id": "src-a2a",
      "type": "announcement",
      "title": "Announcing the Agent2Agent Protocol (A2A)",
      "url": "https://developers.googleblog.com/en/a2a-a-new-era-of-agent-interoperability/",
      "retrieved_date": "2026-05-28",
      "public": true,
      "summary": "Google Developers Blog announcement of A2A, partner count, transports, and positioning relative to MCP.\n"
    },
    {
      "id": "src-nvlink-fusion",
      "type": "announcement",
      "title": "NVIDIA NVLink Fusion for Semi-Custom AI Infrastructure",
      "url": "https://nvidianews.nvidia.com/news/nvidia-nvlink-fusion-semi-custom-ai-infrastructure-partner-ecosystem",
      "retrieved_date": "2026-05-28",
      "public": true,
      "summary": "NVIDIA press release opening NVLink to custom CPUs and ASICs with scale-out networking targets.\n"
    },
    {
      "id": "src-nvidia-rubin",
      "type": "announcement",
      "title": "NVIDIA Rubin and Vera Rubin NVL72",
      "url": "https://www.nvidia.com/en-us/data-center/vera-rubin/",
      "retrieved_date": "2026-05-28",
      "public": true,
      "summary": "NVIDIA Rubin platform announcement with rack bandwidth, GPU memory, and inference token-cost claims. Forward-dated; reported figures.\n"
    },
    {
      "id": "src-deepseek-v4",
      "type": "report",
      "title": "DeepSeek-V4 Preview model card",
      "url": "https://github.com/deepseek-ai/DeepSeek-V4",
      "retrieved_date": "2026-05-28",
      "public": true,
      "summary": "DeepSeek-V4 Pro/Flash preview with 1M-token context and inference FLOPs/KV-cache reductions vs V3.2. Forward-dated; reported figures.\n"
    },
    {
      "id": "src-claude-opus-4-8",
      "type": "announcement",
      "title": "Introducing Claude Opus 4.8",
      "url": "https://www.anthropic.com/news/claude-opus-4-8",
      "retrieved_date": "2026-05-29",
      "public": true,
      "summary": "Anthropic announcement of Claude Opus 4.8 with effort control, Claude Code dynamic workflows (hundreds of parallel subagents), reliability gains over Opus 4.7, and a ~3x cheaper fast mode.\n"
    }
  ]
}