From c8ab97e77c8ab56b9835d9f260dc222a10e9b3c6 Mon Sep 17 00:00:00 2001 From: Alexander Foremny Date: Mon, 18 Dec 2023 07:38:22 +0100 Subject: feat: add support for c, elm, nix, shell --- app/Comment.hs | 28 +++-- app/Comment/Language.hs | 51 ++++++--- app/Exception.hs | 18 ++- app/History.hs | 9 +- app/TreeSitter.hs | 297 ++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 372 insertions(+), 31 deletions(-) (limited to 'app') diff --git a/app/Comment.hs b/app/Comment.hs index 2769c83..123acec 100644 --- a/app/Comment.hs +++ b/app/Comment.hs @@ -9,7 +9,7 @@ module Comment ) where -import Comment.Language +import Comment.Language qualified as L import Control.Applicative (liftA2) import Control.Exception (catch) import Data.Binary (Binary) @@ -32,7 +32,7 @@ import TreeSitter qualified as S data Comment = Comment { text :: T.Text, - language :: Language, + language :: L.Language, startByte :: Int, endByte :: Int, startPoint :: Point, @@ -50,12 +50,20 @@ data Point = Point getComments :: Git.CommitHash -> FilePath -> IO [Comment] getComments commitHash filePath = fmap mergeLineComments - . (extractComments filePath language . LB.toStrict) + . ( extractComments + filePath + ( -- TODO Support amiguous file languages + -- + -- @backlog + N.head language + ) + . LB.toStrict + ) =<< catch (Git.readTextFileOfBS commitHash filePath) (\(_ :: E.CannotReadFile) -> pure "") where - language = fromExtension (takeExtension filePath) + language = L.fromPath (takeExtension filePath) mergeLineComments :: [Comment] -> [Comment] mergeLineComments = @@ -86,13 +94,13 @@ getComments commitHash filePath = | p a x = go ((x N.:| a : as) : rs) xs | otherwise = go (N.singleton x : ass) xs -extractComments :: FilePath -> Language -> B.ByteString -> IO [Comment] +extractComments :: FilePath -> L.Language -> B.ByteString -> IO [Comment] extractComments filePath language str' = alloca $ \nodesPtrPtr -> do alloca $ \numNodesPtr -> do B.useAsCString str' $ \str -> S.extract_comments - (parser language) + (L.parser language) str nodesPtrPtr numNodesPtr @@ -135,11 +143,11 @@ comment :: CommentStyle -> T.Text -> T.Text comment (LineStyle linePrefix) = T.unlines . map ((linePrefix <> " ") <>) . T.lines comment (BlockStyle blockStart blockEnd) = (blockStart <>) . (<> blockEnd) -uncomment :: Language -> T.Text -> (CommentStyle, T.Text) +uncomment :: L.Language -> T.Text -> (CommentStyle, T.Text) uncomment language rawText = maybe - ( ( LineStyle (lineStart language), - stripLineComments (lineStart language) text + ( ( LineStyle (L.lineStart language), + stripLineComments (L.lineStart language) text ) ) ( \(blockStart, blockEnd) -> @@ -148,7 +156,7 @@ uncomment language rawText = ) ) $ do - (blockStarts, blockEnd) <- block language + (blockStarts, blockEnd) <- L.block language (,blockEnd) <$> find (`T.isPrefixOf` text) blockStarts where text = stripLines rawText diff --git a/app/Comment/Language.hs b/app/Comment/Language.hs index 925cf56..7a9963f 100644 --- a/app/Comment/Language.hs +++ b/app/Comment/Language.hs @@ -1,6 +1,8 @@ +{-# LANGUAGE DerivingStrategies #-} + module Comment.Language ( Language (..), - fromExtension, + fromPath, parser, lineStart, block, @@ -10,31 +12,54 @@ where import Control.Exception (throw) import Data.Binary (Binary) +import Data.Languages qualified as L +import Data.List.NonEmpty qualified as N +import Data.Maybe (fromMaybe) import Data.Text qualified as T import Exception qualified as E import Foreign.Ptr (Ptr) import GHC.Generics (Generic) import TreeSitter qualified as S -data Language - = Haskell - deriving (Eq, Show, Generic, Binary) +newtype Language = Language {languageKey :: L.LanguageKey} + deriving (Eq, Show, Generic) + deriving newtype (Binary) -fromExtension :: String -> Language -fromExtension ".hs" = Haskell -fromExtension ext = throw $ E.UnknownFileExtension ext +fromPath :: FilePath -> N.NonEmpty Language +fromPath fp = + fromMaybe (throw $ E.UnknownFile fp) + . N.nonEmpty + . map (Language . L.languageName) + $ L.languagesForPath fp --- TODO add languages elm, shell, nix +-- TODO Add support for all tree-sitter supported languages -- --- @supersedes add-support-for-all-tree-grepper-supported-files +-- @backlog parser :: Language -> Ptr S.Language -parser Haskell = S.tree_sitter_haskell +parser (Language "C") = S.tree_sitter_c +parser (Language "Elm") = S.tree_sitter_elm +parser (Language "Haskell") = S.tree_sitter_haskell +parser (Language "Nix") = S.tree_sitter_nix +parser (Language "Shell") = S.tree_sitter_bash +parser (Language {languageKey}) = throw (E.UnsupportedLanguage languageKey) lineStart :: Language -> T.Text -lineStart Haskell = "--" +lineStart (Language "C") = "//" +lineStart (Language "Elm") = "--" +lineStart (Language "Haskell") = "--" +lineStart (Language "Nix") = "#" +lineStart (Language "Shell") = "#" +lineStart (Language {languageKey}) = throw (E.UnsupportedLanguage languageKey) block :: Language -> Maybe ([T.Text], T.Text) -block Haskell = Just (["{-"], "-}") +block (Language "C") = Just (["/*"], "*/") +block (Language "Elm") = Just (["{-|", "{-"], "-}") +block (Language "Haskell") = Just (["{-"], "-}") +block (Language "Nix") = Just (["/*"], "*/") +block (Language "Shell") = Nothing +block (Language {languageKey}) = throw (E.UnsupportedLanguage languageKey) nodeTypes :: Language -> [String] -nodeTypes Haskell = ["comment"] +nodeTypes (Language "Haskell") = ["comment"] +nodeTypes (Language "Shell") = ["comment"] +nodeTypes (Language {languageKey}) = throw (E.UnsupportedLanguage languageKey) diff --git a/app/Exception.hs b/app/Exception.hs index 5428194..db7612b 100644 --- a/app/Exception.hs +++ b/app/Exception.hs @@ -3,16 +3,18 @@ module Exception InvalidTreeGrepperResult (..), NoCommits (..), ProcessException (..), - UnknownFileExtension (..), + UnknownFile (..), InvalidDiff (..), InvalidIssue (..), CannotReadFile (..), + UnsupportedLanguage (..), ) where import CMark qualified as D import Control.Exception import Data.ByteString.Lazy.Char8 as LB +import Data.Text qualified as T import Data.Void (Void) import System.Exit (ExitCode) import Text.Megaparsec qualified as P @@ -21,9 +23,10 @@ data AnyException = InvalidTreeGrepperResult' InvalidTreeGrepperResult | NoCommits' NoCommits | ProcessException' ProcessException - | UnknownFileExtension' UnknownFileExtension + | UnknownFile' UnknownFile | InvalidDiff' InvalidDiff | InvalidIssue' InvalidIssue + | UnsupportedLanguage' UnsupportedLanguage deriving (Show) instance Exception AnyException @@ -45,12 +48,12 @@ data ProcessException = ProcessException String ExitCode LB.ByteString instance Exception ProcessException -data UnknownFileExtension = UnknownFileExtension - { extension :: String +data UnknownFile = UnknownFile + { filePath :: FilePath } deriving (Show) -instance Exception UnknownFileExtension +instance Exception UnknownFile data InvalidDiff = InvalidDiff String deriving (Show) @@ -66,3 +69,8 @@ data CannotReadFile = CannotReadFile FilePath deriving (Show) instance Exception CannotReadFile + +data UnsupportedLanguage = UnsupportedLanguage T.Text + deriving (Show) + +instance Exception UnsupportedLanguage diff --git a/app/History.hs b/app/History.hs index a76e97b..c0427fa 100644 --- a/app/History.hs +++ b/app/History.hs @@ -11,7 +11,7 @@ import CMark qualified as D import Cache (cachedMaybe) import Comment qualified as G import Control.Arrow (first) -import Control.Exception (catch, handle, try) +import Control.Exception (Handler (..), catch, catches, try) import Data.Binary (Binary) import Data.ByteString.Lazy qualified as LB import Data.Digest.Pure.SHA qualified as S @@ -139,9 +139,12 @@ getIssuesAndFilesChanged commitHash = do -- | Get all issues in the given directory and file. getIssuesOfFile :: CommitHash -> FilePath -> IO [I.Issue] getIssuesOfFile commitHash filename = - handle (\(_ :: E.UnknownFileExtension) -> pure []) $ - fmap catMaybes . parMapM (fromComment commitHash) + ( fmap catMaybes . parMapM (fromComment commitHash) =<< G.getComments commitHash filename + ) + `catches` [ Handler \(_ :: E.UnknownFile) -> pure [], + Handler \(_ :: E.UnsupportedLanguage) -> pure [] + ] -- | Note that `provenance` is trivial and needs to be fixed up later. fromComment :: CommitHash -> G.Comment -> IO (Maybe I.Issue) diff --git a/app/TreeSitter.hs b/app/TreeSitter.hs index 230fefc..8f9e02d 100644 --- a/app/TreeSitter.hs +++ b/app/TreeSitter.hs @@ -3,7 +3,106 @@ module TreeSitter Node (..), Point (..), extract_comments, + tree_sitter_bash, + tree_sitter_beancount, + tree_sitter_bibtex, + tree_sitter_c, + tree_sitter_c_sharp, + tree_sitter_clojure, + tree_sitter_cmake, + tree_sitter_comment, + tree_sitter_commonlisp, + tree_sitter_cpp, + tree_sitter_css, + tree_sitter_cuda, + tree_sitter_dart, + tree_sitter_devicetree, + tree_sitter_dockerfile, + tree_sitter_dot, + tree_sitter_eex, + tree_sitter_elisp, + tree_sitter_elixir, + tree_sitter_elm, + tree_sitter_embedded_template, + tree_sitter_erlang, + tree_sitter_fennel, + tree_sitter_fish, + tree_sitter_fortran, + tree_sitter_gdscript, + tree_sitter_glimmer, + tree_sitter_glsl, + tree_sitter_go, + tree_sitter_godot_resource, + tree_sitter_gomod, + tree_sitter_gowork, + tree_sitter_graphql, tree_sitter_haskell, + tree_sitter_hcl, + tree_sitter_heex, + tree_sitter_hjson, + tree_sitter_html, + tree_sitter_http, + tree_sitter_janet_simple, + tree_sitter_java, + tree_sitter_javascript, + tree_sitter_jsdoc, + tree_sitter_json, + tree_sitter_json5, + tree_sitter_jsonnet, + tree_sitter_julia, + tree_sitter_kotlin, + tree_sitter_latex, + tree_sitter_ledger, + tree_sitter_llvm, + tree_sitter_lua, + tree_sitter_make, + tree_sitter_markdown, + tree_sitter_markdown_inline, + tree_sitter_nickel, + tree_sitter_nix, + tree_sitter_norg, + tree_sitter_nu, + tree_sitter_ocaml, + tree_sitter_ocaml_interface, + tree_sitter_perl, + tree_sitter_pgn, + tree_sitter_php, + tree_sitter_pioasm, + tree_sitter_prisma, + tree_sitter_pug, + tree_sitter_python, + tree_sitter_ql, + tree_sitter_ql_dbscheme, + tree_sitter_query, + tree_sitter_r, + tree_sitter_regex, + tree_sitter_rego, + tree_sitter_rst, + tree_sitter_ruby, + tree_sitter_rust, + tree_sitter_scala, + tree_sitter_scheme, + tree_sitter_scss, + tree_sitter_smithy, + tree_sitter_solidity, + tree_sitter_sparql, + tree_sitter_sql, + tree_sitter_supercollider, + tree_sitter_surface, + tree_sitter_svelte, + tree_sitter_tiger, + tree_sitter_tlaplus, + tree_sitter_toml, + tree_sitter_tsq, + tree_sitter_tsx, + tree_sitter_turtle, + tree_sitter_typescript, + tree_sitter_verilog, + tree_sitter_vim, + tree_sitter_vue, + tree_sitter_yaml, + tree_sitter_yang, + tree_sitter_zig, ) where @@ -57,4 +156,202 @@ foreign import ccall unsafe "extract_comments" Ptr CInt -> IO () +foreign import ccall unsafe "tree_sitter_bash" tree_sitter_bash :: Ptr Language + +foreign import ccall unsafe "tree_sitter_beancount" tree_sitter_beancount :: Ptr Language + +foreign import ccall unsafe "tree_sitter_bibtex" tree_sitter_bibtex :: Ptr Language + +foreign import ccall unsafe "tree_sitter_clojure" tree_sitter_clojure :: Ptr Language + +foreign import ccall unsafe "tree_sitter_cmake" tree_sitter_cmake :: Ptr Language + +foreign import ccall unsafe "tree_sitter_comment" tree_sitter_comment :: Ptr Language + +foreign import ccall unsafe "tree_sitter_commonlisp" tree_sitter_commonlisp :: Ptr Language + +foreign import ccall unsafe "tree_sitter_cpp" tree_sitter_cpp :: Ptr Language + +foreign import ccall unsafe "tree_sitter_c_sharp" tree_sitter_c_sharp :: Ptr Language + +foreign import ccall unsafe "tree_sitter_css" tree_sitter_css :: Ptr Language + +foreign import ccall unsafe "tree_sitter_c" tree_sitter_c :: Ptr Language + +foreign import ccall unsafe "tree_sitter_cuda" tree_sitter_cuda :: Ptr Language + +foreign import ccall unsafe "tree_sitter_dart" tree_sitter_dart :: Ptr Language + +foreign import ccall unsafe "tree_sitter_devicetree" tree_sitter_devicetree :: Ptr Language + +foreign import ccall unsafe "tree_sitter_dockerfile" tree_sitter_dockerfile :: Ptr Language + +foreign import ccall unsafe "tree_sitter_dot" tree_sitter_dot :: Ptr Language + +foreign import ccall unsafe "tree_sitter_eex" tree_sitter_eex :: Ptr Language + +foreign import ccall unsafe "tree_sitter_elisp" tree_sitter_elisp :: Ptr Language + +foreign import ccall unsafe "tree_sitter_elixir" tree_sitter_elixir :: Ptr Language + +foreign import ccall unsafe "tree_sitter_elm" tree_sitter_elm :: Ptr Language + +foreign import ccall unsafe "tree_sitter_embedded_template" tree_sitter_embedded_template :: Ptr Language + +foreign import ccall unsafe "tree_sitter_erlang" tree_sitter_erlang :: Ptr Language + +foreign import ccall unsafe "tree_sitter_fennel" tree_sitter_fennel :: Ptr Language + +foreign import ccall unsafe "tree_sitter_fish" tree_sitter_fish :: Ptr Language + +foreign import ccall unsafe "tree_sitter_fortran" tree_sitter_fortran :: Ptr Language + +foreign import ccall unsafe "tree_sitter_gdscript" tree_sitter_gdscript :: Ptr Language + +foreign import ccall unsafe "tree_sitter_glimmer" tree_sitter_glimmer :: Ptr Language + +foreign import ccall unsafe "tree_sitter_glsl" tree_sitter_glsl :: Ptr Language + +foreign import ccall unsafe "tree_sitter_godot_resource" tree_sitter_godot_resource :: Ptr Language + +foreign import ccall unsafe "tree_sitter_gomod" tree_sitter_gomod :: Ptr Language + +foreign import ccall unsafe "tree_sitter_go" tree_sitter_go :: Ptr Language + +foreign import ccall unsafe "tree_sitter_gowork" tree_sitter_gowork :: Ptr Language + +foreign import ccall unsafe "tree_sitter_graphql" tree_sitter_graphql :: Ptr Language + foreign import ccall unsafe "tree_sitter_haskell" tree_sitter_haskell :: Ptr Language + +foreign import ccall unsafe "tree_sitter_hcl" tree_sitter_hcl :: Ptr Language + +foreign import ccall unsafe "tree_sitter_heex" tree_sitter_heex :: Ptr Language + +foreign import ccall unsafe "tree_sitter_hjson" tree_sitter_hjson :: Ptr Language + +foreign import ccall unsafe "tree_sitter_html" tree_sitter_html :: Ptr Language + +foreign import ccall unsafe "tree_sitter_http" tree_sitter_http :: Ptr Language + +foreign import ccall unsafe "tree_sitter_janet_simple" tree_sitter_janet_simple :: Ptr Language + +foreign import ccall unsafe "tree_sitter_javascript" tree_sitter_javascript :: Ptr Language + +foreign import ccall unsafe "tree_sitter_java" tree_sitter_java :: Ptr Language + +foreign import ccall unsafe "tree_sitter_jsdoc" tree_sitter_jsdoc :: Ptr Language + +foreign import ccall unsafe "tree_sitter_json5" tree_sitter_json5 :: Ptr Language + +foreign import ccall unsafe "tree_sitter_jsonnet" tree_sitter_jsonnet :: Ptr Language + +foreign import ccall unsafe "tree_sitter_json" tree_sitter_json :: Ptr Language + +foreign import ccall unsafe "tree_sitter_julia" tree_sitter_julia :: Ptr Language + +foreign import ccall unsafe "tree_sitter_kotlin" tree_sitter_kotlin :: Ptr Language + +foreign import ccall unsafe "tree_sitter_latex" tree_sitter_latex :: Ptr Language + +foreign import ccall unsafe "tree_sitter_ledger" tree_sitter_ledger :: Ptr Language + +foreign import ccall unsafe "tree_sitter_llvm" tree_sitter_llvm :: Ptr Language + +foreign import ccall unsafe "tree_sitter_lua" tree_sitter_lua :: Ptr Language + +foreign import ccall unsafe "tree_sitter_make" tree_sitter_make :: Ptr Language + +foreign import ccall unsafe "tree_sitter_markdown_inline" tree_sitter_markdown_inline :: Ptr Language + +foreign import ccall unsafe "tree_sitter_markdown" tree_sitter_markdown :: Ptr Language + +foreign import ccall unsafe "tree_sitter_nickel" tree_sitter_nickel :: Ptr Language + +foreign import ccall unsafe "tree_sitter_nix" tree_sitter_nix :: Ptr Language + +foreign import ccall unsafe "tree_sitter_norg" tree_sitter_norg :: Ptr Language + +foreign import ccall unsafe "tree_sitter_nu" tree_sitter_nu :: Ptr Language + +foreign import ccall unsafe "tree_sitter_ocaml_interface" tree_sitter_ocaml_interface :: Ptr Language + +foreign import ccall unsafe "tree_sitter_ocaml" tree_sitter_ocaml :: Ptr Language + +foreign import ccall unsafe "tree_sitter_perl" tree_sitter_perl :: Ptr Language + +foreign import ccall unsafe "tree_sitter_pgn" tree_sitter_pgn :: Ptr Language + +foreign import ccall unsafe "tree_sitter_php" tree_sitter_php :: Ptr Language + +foreign import ccall unsafe "tree_sitter_pioasm" tree_sitter_pioasm :: Ptr Language + +foreign import ccall unsafe "tree_sitter_prisma" tree_sitter_prisma :: Ptr Language + +foreign import ccall unsafe "tree_sitter_pug" tree_sitter_pug :: Ptr Language + +foreign import ccall unsafe "tree_sitter_python" tree_sitter_python :: Ptr Language + +foreign import ccall unsafe "tree_sitter_dbscheme" tree_sitter_ql_dbscheme :: Ptr Language + +foreign import ccall unsafe "tree_sitter_ql" tree_sitter_ql :: Ptr Language + +foreign import ccall unsafe "tree_sitter_query" tree_sitter_query :: Ptr Language + +foreign import ccall unsafe "tree_sitter_regex" tree_sitter_regex :: Ptr Language + +foreign import ccall unsafe "tree_sitter_rego" tree_sitter_rego :: Ptr Language + +foreign import ccall unsafe "tree_sitter_rst" tree_sitter_rst :: Ptr Language + +foreign import ccall unsafe "tree_sitter_r" tree_sitter_r :: Ptr Language + +foreign import ccall unsafe "tree_sitter_ruby" tree_sitter_ruby :: Ptr Language + +foreign import ccall unsafe "tree_sitter_rust" tree_sitter_rust :: Ptr Language + +foreign import ccall unsafe "tree_sitter_scala" tree_sitter_scala :: Ptr Language + +foreign import ccall unsafe "tree_sitter_scheme" tree_sitter_scheme :: Ptr Language + +foreign import ccall unsafe "tree_sitter_scss" tree_sitter_scss :: Ptr Language + +foreign import ccall unsafe "tree_sitter_smithy" tree_sitter_smithy :: Ptr Language + +foreign import ccall unsafe "tree_sitter_solidity" tree_sitter_solidity :: Ptr Language + +foreign import ccall unsafe "tree_sitter_sparql" tree_sitter_sparql :: Ptr Language + +foreign import ccall unsafe "tree_sitter_sql" tree_sitter_sql :: Ptr Language + +foreign import ccall unsafe "tree_sitter_supercollider" tree_sitter_supercollider :: Ptr Language + +foreign import ccall unsafe "tree_sitter_surface" tree_sitter_surface :: Ptr Language + +foreign import ccall unsafe "tree_sitter_svelte" tree_sitter_svelte :: Ptr Language + +foreign import ccall unsafe "tree_sitter_tiger" tree_sitter_tiger :: Ptr Language + +foreign import ccall unsafe "tree_sitter_tlaplus" tree_sitter_tlaplus :: Ptr Language + +foreign import ccall unsafe "tree_sitter_toml" tree_sitter_toml :: Ptr Language + +foreign import ccall unsafe "tree_sitter_tsq" tree_sitter_tsq :: Ptr Language + +foreign import ccall unsafe "tree_sitter_tsx" tree_sitter_tsx :: Ptr Language + +foreign import ccall unsafe "tree_sitter_turtle" tree_sitter_turtle :: Ptr Language + +foreign import ccall unsafe "tree_sitter_typescript" tree_sitter_typescript :: Ptr Language + +foreign import ccall unsafe "tree_sitter_verilog" tree_sitter_verilog :: Ptr Language + +foreign import ccall unsafe "tree_sitter_vim" tree_sitter_vim :: Ptr Language + +foreign import ccall unsafe "tree_sitter_vue" tree_sitter_vue :: Ptr Language + +foreign import ccall unsafe "tree_sitter_yaml" tree_sitter_yaml :: Ptr Language + +foreign import ccall unsafe "tree_sitter_yang" tree_sitter_yang :: Ptr Language + +foreign import ccall unsafe "tree_sitter_zig" tree_sitter_zig :: Ptr Language -- cgit v1.2.3