Coverage for src/pullapprove/diff.py: 92%
87 statements
« prev ^ index » next coverage.py v7.8.2, created at 2026-03-16 10:09 -0500
« prev ^ index » next coverage.py v7.8.2, created at 2026-03-16 10:09 -0500
1import re
2from collections.abc import Generator, Iterator
5class DiffFile:
6 def __init__(self, *, old_path: str, new_path: str):
7 self.old_path = old_path
8 self.new_path = new_path
10 def __repr__(self) -> str:
11 return f"<DiffFile old_path={self.old_path} new_path={self.new_path}>"
13 def is_move(self) -> bool:
14 return self.old_path != self.new_path
17class DiffHunk:
18 def __init__(
19 self,
20 *,
21 old_line: int,
22 old_length: int | None,
23 new_line: int,
24 new_length: int | None,
25 ):
26 self.old_line = old_line
27 self.old_length = old_length
28 self.new_line = new_line
29 self.new_length = new_length
32class DiffCode:
33 def __init__(
34 self,
35 *,
36 old_line_number: int | None,
37 new_line_number: int | None,
38 content: str,
39 change_type: str,
40 ):
41 self.old_line_number = old_line_number
42 self.new_line_number = new_line_number
43 self.content = content
44 self.change_type = change_type
46 def is_addition(self) -> bool:
47 return self.change_type == "+"
49 def is_deletion(self) -> bool:
50 return self.change_type == "-"
52 def is_context(self) -> bool:
53 return self.change_type == ""
55 @property
56 def line_number(self) -> int:
57 """For backwards compatibility - returns the appropriate line number."""
58 if self.is_deletion():
59 return self.old_line_number or 0
60 return self.new_line_number or 0
62 def __str__(self) -> str:
63 return f"{self.line_number}: {self.change_type or ' '}{self.content}"
65 def __repr__(self) -> str:
66 return f"<DiffCode change_type={self.change_type} old_line={self.old_line_number} new_line={self.new_line_number} content={self.content}>"
68 def raw(self) -> str:
69 return f"{self.change_type or ' '}{self.content}"
72def parse_diff_file_line(line: str) -> DiffFile | None:
73 match = re.match(r"^diff --git \w/(.*) \w/(.*)", line)
74 if match:
75 a_path, b_path = match.groups()
76 return DiffFile(
77 old_path=a_path.strip(),
78 new_path=b_path.strip(),
79 )
80 return None
83def parse_diff_hunk_line(line: str) -> DiffHunk | None:
84 match = re.match(r"^@@ -(\d+),?(\d+)? \+(\d+),?(\d+)? @@", line)
85 if match:
86 old_line, old_length, new_line, new_length = match.groups()
87 return DiffHunk(
88 old_line=int(old_line),
89 old_length=int(old_length) if old_length else None,
90 new_line=int(new_line),
91 new_length=int(new_length) if new_length else None,
92 )
93 return None
96def iterate_diff_parts(
97 diff: Iterator[str] | str,
98) -> Generator[DiffFile | DiffCode]:
99 current_file, current_hunk = None, None
101 # Keep track of where we are in the hunk as we go
102 hunk_minus_line_number, hunk_plus_line_number = 0, 0
104 if isinstance(diff, str):
105 diff_iterator = diff.splitlines()
106 else:
107 diff_iterator = diff
109 for raw in diff_iterator:
110 if new_file := parse_diff_file_line(raw):
111 current_file = new_file
112 current_hunk = None
113 yield new_file # Yield the new file as we go
114 elif current_file:
115 if new_hunk := parse_diff_hunk_line(raw):
116 current_hunk = new_hunk
118 hunk_minus_line_number = current_hunk.old_line
119 hunk_plus_line_number = current_hunk.new_line
121 # Git may include the first line of context immediately after
122 # the second `@@` in the hunk header (e.g. function/context
123 # signatures). For example:
124 #
125 # @@ -6,7 +6,7 @@ binary, for any purpose, ...
126 #
127 # In that case the portion after the final `@@` should be
128 # treated as an unchanged context line that belongs to the
129 # hunk. The existing logic only yields lines that start with
130 # a prefix character ("+", "-", or space). To ensure we don't
131 # silently drop this first line we detect any trailing text
132 # after the hunk header and immediately yield it as a context
133 # `DiffCode` line.
134 #
135 # Find the position of the closing `@@` and capture anything
136 # that follows. We purposefully split on the first occurrence
137 # of `@@` (after the initial one already matched by the regex)
138 # so we don't mis-handle unusual file paths that might contain
139 # the same token.
140 # If the line contains more than one set of "@@" tokens then
141 # any text that appears after the final token represents the
142 # first context line of the hunk. Extract that portion and
143 # yield it as a normal (unchanged) diff line.
144 trailing = ""
145 if raw.count("@@") > 1:
146 trailing = raw.split("@@")[-1].lstrip()
148 if trailing:
149 yield DiffCode(
150 old_line_number=hunk_minus_line_number,
151 new_line_number=hunk_plus_line_number,
152 content=trailing,
153 change_type="",
154 )
156 # Increment the counters because we just consumed the first
157 # context line for both the old and new versions.
158 hunk_plus_line_number += 1
159 hunk_minus_line_number += 1
160 elif current_hunk:
161 if raw.startswith("+"):
162 yield DiffCode(
163 old_line_number=None,
164 new_line_number=hunk_plus_line_number,
165 content=raw[1:],
166 change_type="+",
167 )
168 hunk_plus_line_number += 1
169 elif raw.startswith("-"):
170 yield DiffCode(
171 old_line_number=hunk_minus_line_number,
172 new_line_number=None,
173 content=raw[1:],
174 change_type="-",
175 )
176 hunk_minus_line_number += 1
177 elif raw.startswith(" "):
178 yield DiffCode(
179 old_line_number=hunk_minus_line_number,
180 new_line_number=hunk_plus_line_number,
181 content=raw[1:],
182 change_type="",
183 )
184 hunk_plus_line_number += 1
185 hunk_minus_line_number += 1
186 else:
187 continue
188 else:
189 # Header/meta lines between file and hunk...
190 pass