app/Comment.hs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207

module Comment
  ( Comment (..),
    Point (..),
    getComments,
    extractComments,
    CommentStyle (..),
    uncomment,
    comment,
  )
where

import Comment.Language
import Control.Applicative (liftA2)
import Control.Exception (catch)
import Control.Monad
import Data.Binary (Binary)
import Data.ByteString qualified as B
import Data.ByteString.Lazy qualified as LB
import Data.List (find, sortBy)
import Data.List.NonEmpty qualified as N
import Data.Maybe (fromMaybe)
import Data.Ord (comparing)
import Data.Text qualified as T
import Data.Text.Encoding qualified as T
import Exception qualified as E
import Foreign.C.String
import Foreign.Marshal.Alloc (free, malloc)
import Foreign.Ptr (Ptr, nullPtr)
import Foreign.Storable
import GHC.Generics (Generic)
import GHC.Int (Int64)
import Git qualified
import System.FilePath (takeExtension)
import TreeSitter qualified as S

data Comment = Comment
  { text :: T.Text,
    language :: Language,
    startByte :: Int64,
    endByte :: Int64,
    startPoint :: Point,
    endPoint :: Point,
    filePath :: FilePath
  }
  deriving (Eq, Show)

data Point = Point
  { row :: Int,
    column :: Int
  }
  deriving (Eq, Show, Generic, Binary)

getComments :: Git.CommitHash -> FilePath -> IO [Comment]
getComments commitHash filePath =
  fmap mergeLineComments
    . (extractComments filePath language . LB.toStrict)
    =<< catch
      (Git.readTextFileOfBS commitHash filePath)
      (\(_ :: E.CannotReadFile) -> pure "")
  where
    language = fromExtension (takeExtension filePath)

    mergeLineComments :: [Comment] -> [Comment]
    mergeLineComments =
      map mergeGroup
        . chainsBy (\a b -> a.endPoint.row + 1 == b.startPoint.row)
        . sortBy (comparing (liftA2 (,) (.startByte) (.endByte)))

    mergeGroup :: N.NonEmpty Comment -> Comment
    mergeGroup css@(c N.:| cs) =
      c
        { text = T.unlines (map (.text) (c : cs)),
          startByte = first.startByte,
          endByte = last.endByte,
          startPoint = first.startPoint,
          endPoint = last.endPoint
        }
      where
        first = N.head css
        last = N.last css

    {- A version of `Data.List.groupBy` that uses the last added group-member for comparison with new candidates for the group. `Data.List.groupBy` uses the initial member for all subsequent comparisons. -}
    chainsBy :: (a -> a -> Bool) -> [a] -> [N.NonEmpty a]
    chainsBy p = reverse . map N.reverse . go []
      where
        go rs [] = rs
        go [] (x : xs) = go [N.singleton x] xs
        go (ass@((a N.:| as) : rs)) (x : xs)
          | p a x = go ((x N.:| a : as) : rs) xs
          | otherwise = go (N.singleton x : ass) xs

extractComments :: FilePath -> Language -> B.ByteString -> IO [Comment]
extractComments filePath language str' = do
  S.withParser (parser language) $ \parser -> do
    B.useAsCStringLen str' $ \(str, len) -> do
      tree <- S.ts_parser_parse_string parser nullPtr str (fromIntegral len)
      node <- malloc
      S.ts_tree_root_node tree node
      x <-
        mapM
          ( \n' -> do
              startByte <- fromIntegral <$> S.ts_node_start_byte n'
              endByte <- fromIntegral <$> S.ts_node_end_byte n'
              let text =
                    T.decodeUtf8
                      . B.take (fromIntegral endByte - fromIntegral startByte)
                      . B.drop (fromIntegral startByte)
                      $ str'

              startPoint <- do
                point <- malloc
                S.ts_node_start_point n' point
                S.Point {..} <- peek point
                free point
                pure
                  Point
                    { row = fromIntegral row,
                      column = fromIntegral column
                    }
              endPoint <- do
                point <- malloc
                S.ts_node_end_point n' point
                S.Point {..} <- peek point
                free point
                pure
                  Point
                    { row = fromIntegral row,
                      column = fromIntegral column
                    }

              pure Comment {..}
          )
          =<< (commentsFromNodeRec language node)
      free node
      pure x

commentsFromNodeRec :: Language -> Ptr S.Node -> IO [Ptr S.Node]
commentsFromNodeRec language =
  (filterM (isCommentNode language) =<<)
    . childNodesFromNodeRec

isCommentNode :: Language -> Ptr S.Node -> IO Bool
isCommentNode language n =
  fmap (`elem` (nodeTypes language)) . peekCString =<< S.ts_node_type n

childNodesFromNodeRec :: Ptr S.Node -> IO [Ptr S.Node]
childNodesFromNodeRec n = do
  ns' <- childNodesFromNode n
  ns <- concat <$> mapM childNodesFromNodeRec ns'
  pure $ n : ns

childNodesFromNode :: Ptr S.Node -> IO [Ptr S.Node]
childNodesFromNode n = do
  numChildren <- fromIntegral <$> S.ts_node_named_child_count n
  mapM
    ( \k -> do
        node <- malloc
        S.ts_node_named_child n k node
        pure node
    )
    [0 .. numChildren - 1]

data CommentStyle
  = LineStyle T.Text
  | BlockStyle T.Text T.Text
  deriving (Eq, Show, Generic, Binary)

comment :: CommentStyle -> T.Text -> T.Text
comment (LineStyle linePrefix) = T.unlines . map ((linePrefix <> " ") <>) . T.lines
comment (BlockStyle blockStart blockEnd) = (blockStart <>) . (<> blockEnd)

uncomment :: Language -> T.Text -> (CommentStyle, T.Text)
uncomment language rawText =
  maybe
    ( ( LineStyle (lineStart language),
        stripLineComments (lineStart language) text
      )
    )
    ( \(blockStart, blockEnd) ->
        ( BlockStyle blockStart blockEnd,
          stripBlockComment blockStart blockEnd text
        )
    )
    $ do
      (blockStarts, blockEnd) <- block language
      (,blockEnd) <$> find (`T.isPrefixOf` text) blockStarts
  where
    text = stripLines rawText
    stripLines = T.intercalate "\n" . map T.strip . T.lines

stripLineComments :: T.Text -> T.Text -> T.Text
stripLineComments lineStart text =
  onLines
    ( \line ->
        fromMaybe line . fmap T.stripStart $
          T.stripPrefix lineStart line
    )
    text
  where
    onLines f = T.intercalate "\n" . map f . T.lines

stripBlockComment :: T.Text -> T.Text -> T.Text -> T.Text
stripBlockComment blockStart blockEnd text =
  T.strip
    . (fromMaybe text . T.stripSuffix blockEnd)
    . (fromMaybe text . T.stripPrefix blockStart)
    $ text