Coverage for src/pullapprove/diff.py: 92%

87 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2026-03-16 10:09 -0500

1import re 

2from collections.abc import Generator, Iterator 

3 

4 

5class DiffFile: 

6 def __init__(self, *, old_path: str, new_path: str): 

7 self.old_path = old_path 

8 self.new_path = new_path 

9 

10 def __repr__(self) -> str: 

11 return f"<DiffFile old_path={self.old_path} new_path={self.new_path}>" 

12 

13 def is_move(self) -> bool: 

14 return self.old_path != self.new_path 

15 

16 

17class DiffHunk: 

18 def __init__( 

19 self, 

20 *, 

21 old_line: int, 

22 old_length: int | None, 

23 new_line: int, 

24 new_length: int | None, 

25 ): 

26 self.old_line = old_line 

27 self.old_length = old_length 

28 self.new_line = new_line 

29 self.new_length = new_length 

30 

31 

32class DiffCode: 

33 def __init__( 

34 self, 

35 *, 

36 old_line_number: int | None, 

37 new_line_number: int | None, 

38 content: str, 

39 change_type: str, 

40 ): 

41 self.old_line_number = old_line_number 

42 self.new_line_number = new_line_number 

43 self.content = content 

44 self.change_type = change_type 

45 

46 def is_addition(self) -> bool: 

47 return self.change_type == "+" 

48 

49 def is_deletion(self) -> bool: 

50 return self.change_type == "-" 

51 

52 def is_context(self) -> bool: 

53 return self.change_type == "" 

54 

55 @property 

56 def line_number(self) -> int: 

57 """For backwards compatibility - returns the appropriate line number.""" 

58 if self.is_deletion(): 

59 return self.old_line_number or 0 

60 return self.new_line_number or 0 

61 

62 def __str__(self) -> str: 

63 return f"{self.line_number}: {self.change_type or ' '}{self.content}" 

64 

65 def __repr__(self) -> str: 

66 return f"<DiffCode change_type={self.change_type} old_line={self.old_line_number} new_line={self.new_line_number} content={self.content}>" 

67 

68 def raw(self) -> str: 

69 return f"{self.change_type or ' '}{self.content}" 

70 

71 

72def parse_diff_file_line(line: str) -> DiffFile | None: 

73 match = re.match(r"^diff --git \w/(.*) \w/(.*)", line) 

74 if match: 

75 a_path, b_path = match.groups() 

76 return DiffFile( 

77 old_path=a_path.strip(), 

78 new_path=b_path.strip(), 

79 ) 

80 return None 

81 

82 

83def parse_diff_hunk_line(line: str) -> DiffHunk | None: 

84 match = re.match(r"^@@ -(\d+),?(\d+)? \+(\d+),?(\d+)? @@", line) 

85 if match: 

86 old_line, old_length, new_line, new_length = match.groups() 

87 return DiffHunk( 

88 old_line=int(old_line), 

89 old_length=int(old_length) if old_length else None, 

90 new_line=int(new_line), 

91 new_length=int(new_length) if new_length else None, 

92 ) 

93 return None 

94 

95 

96def iterate_diff_parts( 

97 diff: Iterator[str] | str, 

98) -> Generator[DiffFile | DiffCode]: 

99 current_file, current_hunk = None, None 

100 

101 # Keep track of where we are in the hunk as we go 

102 hunk_minus_line_number, hunk_plus_line_number = 0, 0 

103 

104 if isinstance(diff, str): 

105 diff_iterator = diff.splitlines() 

106 else: 

107 diff_iterator = diff 

108 

109 for raw in diff_iterator: 

110 if new_file := parse_diff_file_line(raw): 

111 current_file = new_file 

112 current_hunk = None 

113 yield new_file # Yield the new file as we go 

114 elif current_file: 

115 if new_hunk := parse_diff_hunk_line(raw): 

116 current_hunk = new_hunk 

117 

118 hunk_minus_line_number = current_hunk.old_line 

119 hunk_plus_line_number = current_hunk.new_line 

120 

121 # Git may include the first line of context immediately after 

122 # the second `@@` in the hunk header (e.g. function/context 

123 # signatures). For example: 

124 # 

125 # @@ -6,7 +6,7 @@ binary, for any purpose, ... 

126 # 

127 # In that case the portion after the final `@@` should be 

128 # treated as an unchanged context line that belongs to the 

129 # hunk. The existing logic only yields lines that start with 

130 # a prefix character ("+", "-", or space). To ensure we don't 

131 # silently drop this first line we detect any trailing text 

132 # after the hunk header and immediately yield it as a context 

133 # `DiffCode` line. 

134 # 

135 # Find the position of the closing `@@` and capture anything 

136 # that follows. We purposefully split on the first occurrence 

137 # of `@@` (after the initial one already matched by the regex) 

138 # so we don't mis-handle unusual file paths that might contain 

139 # the same token. 

140 # If the line contains more than one set of "@@" tokens then 

141 # any text that appears after the final token represents the 

142 # first context line of the hunk. Extract that portion and 

143 # yield it as a normal (unchanged) diff line. 

144 trailing = "" 

145 if raw.count("@@") > 1: 

146 trailing = raw.split("@@")[-1].lstrip() 

147 

148 if trailing: 

149 yield DiffCode( 

150 old_line_number=hunk_minus_line_number, 

151 new_line_number=hunk_plus_line_number, 

152 content=trailing, 

153 change_type="", 

154 ) 

155 

156 # Increment the counters because we just consumed the first 

157 # context line for both the old and new versions. 

158 hunk_plus_line_number += 1 

159 hunk_minus_line_number += 1 

160 elif current_hunk: 

161 if raw.startswith("+"): 

162 yield DiffCode( 

163 old_line_number=None, 

164 new_line_number=hunk_plus_line_number, 

165 content=raw[1:], 

166 change_type="+", 

167 ) 

168 hunk_plus_line_number += 1 

169 elif raw.startswith("-"): 

170 yield DiffCode( 

171 old_line_number=hunk_minus_line_number, 

172 new_line_number=None, 

173 content=raw[1:], 

174 change_type="-", 

175 ) 

176 hunk_minus_line_number += 1 

177 elif raw.startswith(" "): 

178 yield DiffCode( 

179 old_line_number=hunk_minus_line_number, 

180 new_line_number=hunk_plus_line_number, 

181 content=raw[1:], 

182 change_type="", 

183 ) 

184 hunk_plus_line_number += 1 

185 hunk_minus_line_number += 1 

186 else: 

187 continue 

188 else: 

189 # Header/meta lines between file and hunk... 

190 pass