aboutsummaryrefslogtreecommitdiffstats
path: root/app/Comment.hs
blob: 7c76561b51852b50d8a0170114ac8dd36d3b1677 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
module Comment
  ( Comment (..),
    Point (..),
    getComments,
    extractComments,
    CommentStyle (..),
    uncomment,
    comment,
  )
where

import Backend qualified
import Comment.Language qualified as L
import Control.Applicative (liftA2)
import Control.Exception (catch)
import Data.Binary (Binary)
import Data.ByteString qualified as B
import Data.ByteString.Lazy qualified as LB
import Data.List (find, sortBy)
import Data.List.NonEmpty qualified as N
import Data.Maybe (fromMaybe)
import Data.Ord (comparing)
import Data.Text qualified as T
import Data.Text.Encoding qualified as T
import Exception qualified as E
import Foreign.Marshal.Alloc (alloca, free)
import Foreign.Marshal.Array (peekArray)
import Foreign.Storable
import GHC.Generics (Generic)
import System.FilePath (takeExtension)
import TreeSitter qualified as S

data Comment = Comment
  { text :: T.Text,
    language :: L.Language,
    startByte :: Int,
    endByte :: Int,
    startPoint :: Point,
    endPoint :: Point,
    filePath :: FilePath
  }
  deriving (Eq, Show)

data Point = Point
  { row :: Int,
    column :: Int
  }
  deriving (Eq, Show, Generic, Binary)

getComments :: Backend.CommitHash -> FilePath -> IO [Comment]
getComments commitHash filePath =
  fmap mergeLineComments
    . extractComments filePath language
    . LB.toStrict
    =<< catch
      (Backend.readTextFileOfBS commitHash filePath)
      (\(_ :: E.CannotReadFile) -> pure "")
  where
    language = L.fromPath (takeExtension filePath)

    mergeLineComments :: [Comment] -> [Comment]
    mergeLineComments =
      map mergeGroup
        . chainsBy (\a b -> a.endPoint.row + 1 == b.startPoint.row)
        . sortBy (comparing (liftA2 (,) (.startByte) (.endByte)))

    mergeGroup :: N.NonEmpty Comment -> Comment
    mergeGroup css@(c N.:| cs) =
      c
        { text = T.unlines (map (.text) (c : cs)),
          startByte = first.startByte,
          endByte = last.endByte,
          startPoint = first.startPoint,
          endPoint = last.endPoint
        }
      where
        first = N.head css
        last = N.last css

    {- A version of `Data.List.groupBy` that uses the last added group-member for comparison with new candidates for the group. `Data.List.groupBy` uses the initial member for all subsequent comparisons. -}
    chainsBy :: (a -> a -> Bool) -> [a] -> [N.NonEmpty a]
    chainsBy p = reverse . map N.reverse . go []
      where
        go rs [] = rs
        go [] (x : xs) = go [N.singleton x] xs
        go (ass@((a N.:| as) : rs)) (x : xs)
          | p a x = go ((x N.:| a : as) : rs) xs
          | otherwise = go (N.singleton x : ass) xs

extractComments :: FilePath -> L.Language -> B.ByteString -> IO [Comment]
extractComments filePath language str' =
  alloca $ \nodesPtrPtr -> do
    alloca $ \numNodesPtr -> do
      B.useAsCString str' $ \str ->
        S.extract_comments
          (L.parser language)
          str
          nodesPtrPtr
          numNodesPtr
      numNodes <- peek numNodesPtr
      nodesPtr <- peek nodesPtrPtr
      nodes <- peekArray (fromIntegral numNodes) nodesPtr
      free nodesPtr
      pure $
        map
          ( \node ->
              let startByte = fromIntegral node.startByte
                  endByte = fromIntegral node.endByte
               in Comment
                    { startPoint =
                        Point
                          { row = fromIntegral node.startPoint.row + 1,
                            column = fromIntegral node.startPoint.column + 1
                          },
                      endPoint =
                        Point
                          { row = fromIntegral node.endPoint.row + 1,
                            column = fromIntegral node.endPoint.column + 1
                          },
                      text =
                        T.decodeUtf8
                          . B.take (endByte - startByte)
                          . B.drop startByte
                          $ str',
                      ..
                    }
          )
          nodes

data CommentStyle
  = LineStyle T.Text
  | BlockStyle T.Text T.Text
  deriving (Eq, Show, Generic, Binary)

comment :: CommentStyle -> T.Text -> T.Text
comment (LineStyle linePrefix) = T.unlines . map ((linePrefix <> " ") <>) . T.lines
comment (BlockStyle blockStart blockEnd) = (blockStart <>) . (<> blockEnd)

uncomment :: L.Language -> T.Text -> (CommentStyle, T.Text)
uncomment language rawText =
  maybe
    ( ( LineStyle (L.lineStart language),
        stripLineComments (L.lineStart language) text
      )
    )
    ( \(blockStart, blockEnd) ->
        ( BlockStyle blockStart blockEnd,
          stripBlockComment blockStart blockEnd text
        )
    )
    $ do
      (blockStarts, blockEnd) <- L.block language
      (,blockEnd) <$> find (`T.isPrefixOf` text) blockStarts
  where
    text = stripLines rawText
    stripLines = T.intercalate "\n" . map T.strip . T.lines

stripLineComments :: T.Text -> T.Text -> T.Text
stripLineComments lineStart text =
  onLines
    ( \line ->
        fromMaybe line . fmap T.stripStart $
          T.stripPrefix lineStart line
    )
    text
  where
    onLines f = T.intercalate "\n" . map f . T.lines

stripBlockComment :: T.Text -> T.Text -> T.Text -> T.Text
stripBlockComment blockStart blockEnd text =
  T.strip
    . (fromMaybe text . T.stripSuffix blockEnd)
    . (fromMaybe text . T.stripPrefix blockStart)
    $ text