From 2e1ebdee0380f3c507128109d90812ef5ede5756 Mon Sep 17 00:00:00 2001 From: Barrett Ruth Date: Sat, 7 Feb 2026 13:05:53 -0500 Subject: [PATCH] feat(highlight): add treesitter context padding from disk Problem: treesitter parses each diff hunk in isolation, so incomplete syntax constructs at hunk boundaries (e.g., a function definition with no body) produce ERROR nodes and drop captures. Solution: read N lines from the on-disk file before/after each hunk and prepend/append them as unmapped padding lines. The line_map guard in highlight_treesitter skips extmarks for unmapped lines, so padding provides syntax context without visual output. Controlled by highlights.context (default 25, 0 to disable). Also applies to the vim syntax fallback path via a leading_offset filter. --- README.md | 10 ++--- doc/diffs.nvim.txt | 26 ++++++++--- lua/diffs/highlight.lua | 86 +++++++++++++++++++++++++++++++----- lua/diffs/init.lua | 6 +++ lua/diffs/parser.lua | 29 +++++++++++++ spec/highlight_spec.lua | 96 +++++++++++++++++++++++++++++++++++++++++ spec/parser_spec.lua | 79 +++++++++++++++++++++++++++++++++ 7 files changed, 308 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 2e04b2c..e334426 100644 --- a/README.md +++ b/README.md @@ -41,12 +41,10 @@ luarocks install diffs.nvim ## Known Limitations -- **Incomplete syntax context**: Treesitter parses each diff hunk in isolation - without surrounding code context. When a hunk shows lines added to an existing - block (e.g., adding a plugin inside `return { ... }`), the parser doesn't see - the `return` statement and may produce incorrect highlighting. This is - inherent to parsing code fragments—no diff tooling solves this without - significant complexity. +- **Incomplete syntax context**: Treesitter parses each diff hunk in isolation. + To improve accuracy, `diffs.nvim` reads lines from disk before and after each + hunk for parsing context (controlled by `highlights.context`, default 25). + This resolves most boundary issues. Set `highlights.context = 0` to disable. - **Syntax flashing**: `diffs.nvim` hooks into the `FileType fugitive` event triggered by `vim-fugitive`, at which point the buffer is preliminarily diff --git a/doc/diffs.nvim.txt b/doc/diffs.nvim.txt index 9554059..22db825 100644 --- a/doc/diffs.nvim.txt +++ b/doc/diffs.nvim.txt @@ -56,6 +56,7 @@ Configuration is done via `vim.g.diffs`. Set this before the plugin loads: highlights = { background = true, gutter = true, + context = 25, treesitter = { enabled = true, max_lines = 500, @@ -113,6 +114,16 @@ Configuration is done via `vim.g.diffs`. Set this before the plugin loads: Highlight line numbers with matching colors. Only visible if line numbers are enabled. + {context} (integer, default: 25) + Number of lines to read from the source file + before and after each hunk for syntax parsing + context. Improves accuracy at hunk boundaries + where incomplete constructs (e.g., a function + definition with no body) would otherwise confuse + the parser. Set to 0 to disable. Lines are read + from disk with early exit — cost scales with the + context value, not file size. + {treesitter} (table, default: see below) Treesitter highlighting options. See |diffs.TreesitterConfig| for fields. @@ -305,14 +316,15 @@ KNOWN LIMITATIONS *diffs-limitations* Incomplete Syntax Context ~ *diffs-syntax-context* -Treesitter parses each diff hunk in isolation without surrounding code -context. When a hunk shows lines added to an existing block (e.g., adding a -plugin inside `return { ... }`), the parser doesn't see the `return` -statement and may produce incorrect or unusual highlighting. +Treesitter parses each diff hunk in isolation. To provide surrounding code +context, diffs.nvim reads lines from disk before and after each hunk +(controlled by `highlights.context`, default 25). This resolves most boundary +issues where incomplete constructs (e.g., a function definition at the edge +of a hunk with no body) would confuse the parser. -This is inherent to parsing code fragments. No diff tooling solves this -problem without significant complexity—the parser simply doesn't have enough -information to understand the full syntactic structure. +Set `highlights.context = 0` to disable context padding and restore the +previous behavior. In rare cases, context padding may not help if the +relevant surrounding code is very far from the hunk boundaries. Syntax Highlighting Flash ~ *diffs-flash* diff --git a/lua/diffs/highlight.lua b/lua/diffs/highlight.lua index 42c4868..5107893 100644 --- a/lua/diffs/highlight.lua +++ b/lua/diffs/highlight.lua @@ -3,6 +3,33 @@ local M = {} local dbg = require('diffs.log').dbg local diff = require('diffs.diff') +---@param filepath string +---@param from_line integer +---@param count integer +---@return string[] +local function read_line_range(filepath, from_line, count) + if count <= 0 then + return {} + end + local f = io.open(filepath, 'r') + if not f then + return {} + end + local result = {} + local line_num = 0 + for line in f:lines() do + line_num = line_num + 1 + if line_num >= from_line then + table.insert(result, line) + if #result >= count then + break + end + end + end + f:close() + return result +end + local PRIORITY_CLEAR = 198 local PRIORITY_SYNTAX = 199 local PRIORITY_LINE_BG = 200 @@ -177,8 +204,9 @@ end ---@param hunk diffs.Hunk ---@param code_lines string[] ---@param covered_lines? table +---@param leading_offset? integer ---@return integer -local function highlight_vim_syntax(bufnr, ns, hunk, code_lines, covered_lines) +local function highlight_vim_syntax(bufnr, ns, hunk, code_lines, covered_lines, leading_offset) local ft = hunk.ft if not ft then return 0 @@ -188,6 +216,8 @@ local function highlight_vim_syntax(bufnr, ns, hunk, code_lines, covered_lines) return 0 end + leading_offset = leading_offset or 0 + local scratch = vim.api.nvim_create_buf(false, true) vim.api.nvim_buf_set_lines(scratch, 0, -1, false, code_lines) vim.api.nvim_set_option_value('bufhidden', 'wipe', { buf = scratch }) @@ -214,17 +244,21 @@ local function highlight_vim_syntax(bufnr, ns, hunk, code_lines, covered_lines) vim.api.nvim_buf_delete(scratch, { force = true }) + local hunk_line_count = #hunk.lines local extmark_count = 0 for _, span in ipairs(spans) do - local buf_line = hunk.start_line + span.line - 1 - pcall(vim.api.nvim_buf_set_extmark, bufnr, ns, buf_line, span.col_start, { - end_col = span.col_end, - hl_group = span.hl_name, - priority = PRIORITY_SYNTAX, - }) - extmark_count = extmark_count + 1 - if covered_lines then - covered_lines[buf_line] = true + local adj = span.line - leading_offset + if adj >= 1 and adj <= hunk_line_count then + local buf_line = hunk.start_line + adj - 1 + pcall(vim.api.nvim_buf_set_extmark, bufnr, ns, buf_line, span.col_start, { + end_col = span.col_end, + hl_group = span.hl_name, + priority = PRIORITY_SYNTAX, + }) + extmark_count = extmark_count + 1 + if covered_lines then + covered_lines[buf_line] = true + end end end @@ -255,6 +289,20 @@ function M.highlight_hunk(bufnr, ns, hunk, opts) ---@type table local covered_lines = {} + local context = opts.highlights.context or 0 + local leading = {} + local trailing = {} + if (use_ts or use_vim) and context > 0 and hunk.file_new_start and hunk.repo_root then + local filepath = vim.fs.joinpath(hunk.repo_root, hunk.filename) + local lead_from = math.max(1, hunk.file_new_start - context) + local lead_count = hunk.file_new_start - lead_from + if lead_count > 0 then + leading = read_line_range(filepath, lead_from, lead_count) + end + local trail_from = hunk.file_new_start + (hunk.file_new_count or 0) + trailing = read_line_range(filepath, trail_from, context) + end + local extmark_count = 0 if use_ts then ---@type string[] @@ -266,6 +314,11 @@ function M.highlight_hunk(bufnr, ns, hunk, opts) ---@type table local old_map = {} + for _, pad_line in ipairs(leading) do + table.insert(new_code, pad_line) + table.insert(old_code, pad_line) + end + for i, line in ipairs(hunk.lines) do local prefix = line:sub(1, 1) local stripped = line:sub(2) @@ -284,6 +337,11 @@ function M.highlight_hunk(bufnr, ns, hunk, opts) end end + for _, pad_line in ipairs(trailing) do + table.insert(new_code, pad_line) + table.insert(old_code, pad_line) + end + extmark_count = highlight_treesitter(bufnr, ns, new_code, hunk.lang, new_map, 1, covered_lines) extmark_count = extmark_count + highlight_treesitter(bufnr, ns, old_code, hunk.lang, old_map, 1, covered_lines) @@ -305,10 +363,16 @@ function M.highlight_hunk(bufnr, ns, hunk, opts) elseif use_vim then ---@type string[] local code_lines = {} + for _, pad_line in ipairs(leading) do + table.insert(code_lines, pad_line) + end for _, line in ipairs(hunk.lines) do table.insert(code_lines, line:sub(2)) end - extmark_count = highlight_vim_syntax(bufnr, ns, hunk, code_lines, covered_lines) + for _, pad_line in ipairs(trailing) do + table.insert(code_lines, pad_line) + end + extmark_count = highlight_vim_syntax(bufnr, ns, hunk, code_lines, covered_lines, #leading) end if diff --git a/lua/diffs/init.lua b/lua/diffs/init.lua index a5dfcf0..8d8f670 100644 --- a/lua/diffs/init.lua +++ b/lua/diffs/init.lua @@ -14,6 +14,7 @@ ---@class diffs.Highlights ---@field background boolean ---@field gutter boolean +---@field context integer ---@field treesitter diffs.TreesitterConfig ---@field vim diffs.VimConfig ---@field intra diffs.IntraConfig @@ -80,6 +81,7 @@ local default_config = { highlights = { background = true, gutter = true, + context = 25, treesitter = { enabled = true, max_lines = 500, @@ -231,6 +233,7 @@ local function init() vim.validate({ ['highlights.background'] = { opts.highlights.background, 'boolean', true }, ['highlights.gutter'] = { opts.highlights.gutter, 'boolean', true }, + ['highlights.context'] = { opts.highlights.context, 'number', true }, ['highlights.treesitter'] = { opts.highlights.treesitter, 'table', true }, ['highlights.vim'] = { opts.highlights.vim, 'table', true }, ['highlights.intra'] = { opts.highlights.intra, 'table', true }, @@ -291,6 +294,9 @@ local function init() if opts.debounce_ms and opts.debounce_ms < 0 then error('diffs: debounce_ms must be >= 0') end + if opts.highlights and opts.highlights.context and opts.highlights.context < 0 then + error('diffs: highlights.context must be >= 0') + end if opts.highlights and opts.highlights.treesitter diff --git a/lua/diffs/parser.lua b/lua/diffs/parser.lua index 52b2864..43eb1f6 100644 --- a/lua/diffs/parser.lua +++ b/lua/diffs/parser.lua @@ -8,6 +8,11 @@ ---@field lines string[] ---@field header_start_line integer? ---@field header_lines string[]? +---@field file_old_start integer? +---@field file_old_count integer? +---@field file_new_start integer? +---@field file_new_count integer? +---@field repo_root string? local M = {} @@ -132,6 +137,14 @@ function M.parse_buffer(bufnr) local header_start = nil ---@type string[] local header_lines = {} + ---@type integer? + local file_old_start = nil + ---@type integer? + local file_old_count = nil + ---@type integer? + local file_new_start = nil + ---@type integer? + local file_new_count = nil local function flush_hunk() if hunk_start and #hunk_lines > 0 then @@ -143,6 +156,11 @@ function M.parse_buffer(bufnr) header_context = hunk_header_context, header_context_col = hunk_header_context_col, lines = hunk_lines, + file_old_start = file_old_start, + file_old_count = file_old_count, + file_new_start = file_new_start, + file_new_count = file_new_count, + repo_root = repo_root, } if hunk_count == 1 and header_start and #header_lines > 0 then hunk.header_start_line = header_start @@ -154,6 +172,10 @@ function M.parse_buffer(bufnr) hunk_header_context = nil hunk_header_context_col = nil hunk_lines = {} + file_old_start = nil + file_old_count = nil + file_new_start = nil + file_new_count = nil end for i, line in ipairs(lines) do @@ -174,6 +196,13 @@ function M.parse_buffer(bufnr) elseif line:match('^@@.-@@') then flush_hunk() hunk_start = i + local hs, hc, hs2, hc2 = line:match('^@@ %-(%d+),?(%d*) %+(%d+),?(%d*) @@') + if hs then + file_old_start = tonumber(hs) + file_old_count = tonumber(hc) or 1 + file_new_start = tonumber(hs2) + file_new_count = tonumber(hc2) or 1 + end local prefix, context = line:match('^(@@.-@@%s*)(.*)') if context and context ~= '' then hunk_header_context = context diff --git a/spec/highlight_spec.lua b/spec/highlight_spec.lua index 1257523..a556228 100644 --- a/spec/highlight_spec.lua +++ b/spec/highlight_spec.lua @@ -37,6 +37,7 @@ describe('highlight', function() highlights = { background = false, gutter = false, + context = 0, treesitter = { enabled = true, max_lines = 500, @@ -1055,6 +1056,99 @@ describe('highlight', function() assert.is_true(min_line_bg < min_char_bg) delete_buffer(bufnr) end) + + it('context padding produces no extmarks on padding lines', function() + local repo_root = '/tmp/diffs-test-context' + vim.fn.mkdir(repo_root, 'p') + + local f = io.open(repo_root .. '/test.lua', 'w') + f:write('local M = {}\n') + f:write('function M.hello()\n') + f:write(' return "hi"\n') + f:write('end\n') + f:write('return M\n') + f:close() + + local bufnr = create_buffer({ + '@@ -3,1 +3,2 @@', + ' return "hi"', + '+"bye"', + }) + + local hunk = { + filename = 'test.lua', + lang = 'lua', + start_line = 1, + lines = { ' return "hi"', '+"bye"' }, + file_old_start = 3, + file_old_count = 1, + file_new_start = 3, + file_new_count = 2, + repo_root = repo_root, + } + + highlight.highlight_hunk(bufnr, ns, hunk, default_opts({ highlights = { context = 25 } })) + + local extmarks = get_extmarks(bufnr) + for _, mark in ipairs(extmarks) do + local row = mark[2] + assert.is_true(row >= 1 and row <= 2) + end + + delete_buffer(bufnr) + os.remove(repo_root .. '/test.lua') + vim.fn.delete(repo_root, 'rf') + end) + + it('context = 0 matches behavior without padding', function() + local bufnr = create_buffer({ + '@@ -1,1 +1,2 @@', + ' local x = 1', + '+local y = 2', + }) + + local hunk = { + filename = 'test.lua', + lang = 'lua', + start_line = 1, + lines = { ' local x = 1', '+local y = 2' }, + file_new_start = 1, + file_new_count = 2, + repo_root = '/nonexistent', + } + + highlight.highlight_hunk(bufnr, ns, hunk, default_opts({ highlights = { context = 0 } })) + + local extmarks = get_extmarks(bufnr) + assert.is_true(#extmarks > 0) + delete_buffer(bufnr) + end) + + it('gracefully handles missing file for context padding', function() + local bufnr = create_buffer({ + '@@ -1,1 +1,2 @@', + ' local x = 1', + '+local y = 2', + }) + + local hunk = { + filename = 'test.lua', + lang = 'lua', + start_line = 1, + lines = { ' local x = 1', '+local y = 2' }, + file_new_start = 1, + file_new_count = 2, + repo_root = '/nonexistent/path', + } + + assert.has_no.errors(function() + highlight.highlight_hunk(bufnr, ns, hunk, default_opts({ highlights = { context = 25 } })) + end) + + local extmarks = get_extmarks(bufnr) + assert.is_true(#extmarks > 0) + delete_buffer(bufnr) + end) end) describe('diff header highlighting', function() @@ -1086,6 +1180,7 @@ describe('highlight', function() highlights = { background = false, gutter = false, + context = 0, treesitter = { enabled = true, max_lines = 500 }, vim = { enabled = false, max_lines = 200 }, }, @@ -1242,6 +1337,7 @@ describe('highlight', function() highlights = { background = false, gutter = false, + context = 0, treesitter = { enabled = true, max_lines = 500 }, vim = { enabled = false, max_lines = 200 }, }, diff --git a/spec/parser_spec.lua b/spec/parser_spec.lua index 89d0ac8..11ac3be 100644 --- a/spec/parser_spec.lua +++ b/spec/parser_spec.lua @@ -421,5 +421,84 @@ describe('parser', function() os.remove(file_path) vim.fn.delete(repo_root, 'rf') end) + + it('extracts file line numbers from @@ header', function() + local bufnr = create_buffer({ + 'M lua/test.lua', + '@@ -1,3 +1,4 @@', + ' local M = {}', + '+local new = true', + ' return M', + }) + local hunks = parser.parse_buffer(bufnr) + + assert.are.equal(1, #hunks) + assert.are.equal(1, hunks[1].file_old_start) + assert.are.equal(3, hunks[1].file_old_count) + assert.are.equal(1, hunks[1].file_new_start) + assert.are.equal(4, hunks[1].file_new_count) + delete_buffer(bufnr) + end) + + it('extracts large line numbers from @@ header', function() + local bufnr = create_buffer({ + 'M lua/test.lua', + '@@ -100,20 +200,30 @@', + ' local M = {}', + }) + local hunks = parser.parse_buffer(bufnr) + + assert.are.equal(1, #hunks) + assert.are.equal(100, hunks[1].file_old_start) + assert.are.equal(20, hunks[1].file_old_count) + assert.are.equal(200, hunks[1].file_new_start) + assert.are.equal(30, hunks[1].file_new_count) + delete_buffer(bufnr) + end) + + it('defaults count to 1 when omitted in @@ header', function() + local bufnr = create_buffer({ + 'M lua/test.lua', + '@@ -1 +1 @@', + ' local M = {}', + }) + local hunks = parser.parse_buffer(bufnr) + + assert.are.equal(1, #hunks) + assert.are.equal(1, hunks[1].file_old_start) + assert.are.equal(1, hunks[1].file_old_count) + assert.are.equal(1, hunks[1].file_new_start) + assert.are.equal(1, hunks[1].file_new_count) + delete_buffer(bufnr) + end) + + it('stores repo_root on hunk when available', function() + local bufnr = create_buffer({ + 'M lua/test.lua', + '@@ -1,3 +1,4 @@', + ' local M = {}', + '+local new = true', + ' return M', + }) + vim.api.nvim_buf_set_var(bufnr, 'diffs_repo_root', '/tmp/test-repo') + local hunks = parser.parse_buffer(bufnr) + + assert.are.equal(1, #hunks) + assert.are.equal('/tmp/test-repo', hunks[1].repo_root) + delete_buffer(bufnr) + end) + + it('repo_root is nil when not available', function() + local bufnr = create_buffer({ + 'M lua/test.lua', + '@@ -1,3 +1,4 @@', + ' local M = {}', + }) + local hunks = parser.parse_buffer(bufnr) + + assert.are.equal(1, #hunks) + assert.is_nil(hunks[1].repo_root) + delete_buffer(bufnr) + end) end) end)