forked from blitzcode/hackage-diff
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Main.hs
636 lines (602 loc) · 34.4 KB
/
Main.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
{-# LANGUAGE LambdaCase
, ScopedTypeVariables
, OverloadedStrings
, RecordWildCards #-}
module Main (main) where
import System.Exit
import System.Directory
import System.Environment
import System.FilePath
import System.Process
import System.Console.GetOpt
import System.Console.ANSI
import Control.Monad
import Control.Monad.Except
import Control.Monad.State
import Control.Applicative
import Control.Exception
import Control.Concurrent.Async
import Data.Function
import Data.List
import Data.Either
import Data.Char
import qualified Data.Text as T
import qualified Data.Text.IO as TI
import Data.Attoparsec.Text hiding (try)
import Data.Attoparsec.Combinator (lookAhead)
import Text.Printf
import Distribution.PackageDescription
import Distribution.Verbosity (normal)
import Distribution.Simple.Utils (findPackageDesc)
import Distribution.ModuleName (toFilePath, components)
import Language.Haskell.Exts as E
import Language.Preprocessor.Cpphs
import Network.HTTP
import Distribution.Simple.PackageDescription (readGenericPackageDescription)
main :: IO ()
main = do
-- Process command line arguments
(pkgName, argVerA, argVerB, flags) <-
runExcept <$> (getCmdOpt <$> getProgName <*> getArgs) >>= either die return
when (argVerA == argVerB) $
die "Need to specify different versions / packages for comparison"
mode <- case foldr (\f r -> case f of FlagMode m -> m; _ -> r) "downloaddb" flags of
"downloaddb" -> return ModeDownloadDB
"builddb" -> return ModeBuildDB
"parsehs" -> return ModeParseHS
m -> die $ printf "'%s' is not a valid mode" m
let disableColor = FlagDisableColor `elem` flags
silentFlag = FlagSilent `elem` flags
-- Did we get a package version, DB path or package path?
([verA, verB] :: [EitherVerPath]) <- forM [argVerA, argVerB] $ \ver ->
case parseOnly pkgVerParser (T.pack ver) of
-- Not a version, check if we got a valid DB file or package path
Left _ | mode == ModeParseHS -> do
flip unless (die $ errHdr ++ " or package path" ) =<< doesDirectoryExist ver
return $ Right ver
| otherwise -> do
flip unless (die $ errHdr ++ " or database path") =<< doesFileExist ver
return $ Right ver
where errHdr = "'" ++ ver ++ "' is not a valid version string (1.0[.0[.0]])"
-- Looks like a valid version string
Right _ -> return $ Left ver
diff <- withTmpDirectory $ \tmpDir -> do
-- Need to download packages?
when (mode `elem` [ModeBuildDB, ModeParseHS]) $
forM_ (lefts [verA, verB]) $ \verString -> do
let pkg = pkgName ++ "-" ++ verString
unless silentFlag . putStrLn $ "Downloading " ++ pkg ++ "..."
runExceptT (downloadPackage pkg tmpDir) >>= either die return
-- Parse, compute difference
either die return =<<
( runExceptT $
let cp = ComputeParams tmpDir pkgName verA verB silentFlag
in case mode of
ModeDownloadDB -> computeDiffDownloadHoogleDB cp
ModeBuildDB -> computeDiffBuildHoogleDB cp
ModeParseHS -> computeDiffParseHaskell cp
)
-- Output results
unless silentFlag $ printf "\n--- Diff for | %s → %s | ---\n\n"
(either id id verA)
(either id id verB)
outputDiff diff disableColor silentFlag
data FlagMode = ModeDownloadDB | ModeBuildDB | ModeParseHS
deriving (Eq)
data CmdFlag = FlagDisableColor | FlagSilent | FlagMode String
deriving (Eq)
getCmdOpt :: String -> [String] -> Except String (String, String, String, [CmdFlag])
getCmdOpt prgName args =
case getOpt RequireOrder opt args of
(flags, (pkgName:verA:verB:[]), []) -> return (pkgName, verA, verB, flags)
(_, _, []) -> throwError usage
(_, _, err) -> throwError (concat err ++ "\n" ++ usage)
where
header =
"hackage-diff | Compare the public API of different versions of a Hackage library\n" ++
"github.com/blitzcode/hackage-diff | www.blitzcode.net | (C) 2016 Tim C. Schroeder\n\n" ++
"Usage: " ++ prgName ++ " [options] <package-name> <old-version|path> <new-version|path>"
footer =
"\nExamples:\n" ++
" " ++ prgName ++ " mtl 2.1 2.2.1\n" ++
" " ++ prgName ++ " --mode=builddb JuicyPixels 3.1.4.1 3.1.5.2\n" ++
" " ++ prgName ++ " conduit 1.1.5 ~/tmp/conduit-1.1.6/dist/doc/html/conduit/conduit.txt\n" ++
" " ++ prgName ++ " --mode=parsehs QuickCheck 2.6 2.7.6\n" ++
" " ++ prgName ++ " --mode=parsehs -s Cabal ~/tmp/Cabal-1.18.0/ 1.20.0.0\n"
usage = usageInfo header opt ++ footer
opt = [ Option []
["mode"]
(ReqArg FlagMode "[downloaddb|builddb|parsehs]")
( "what to download / read, how to compare\n" ++
" downloaddb - download Hoogle DBs and diff (Default)\n" ++
" builddb - download packages, build Hoogle DBs and diff\n" ++
" parsehs - download packages, directly diff .hs exports"
)
, Option ['c']
["disable-color"]
(NoArg FlagDisableColor)
"disable color output"
, Option ['s']
["silent"]
(NoArg FlagSilent)
"disable progress output"
]
-- Check a package version string (1.0[.0[.0]])
pkgVerParser :: Parser ()
pkgVerParser = (nDigits 4 <|> nDigits 3 <|> nDigits 2) *> endOfInput
where digitInt = void (decimal :: Parser Int)
nDigits n = count (n - 1) (digitInt *> char '.') *> digitInt
-- Create and clean up temporary working directory
withTmpDirectory :: (FilePath -> IO a) -> IO a
withTmpDirectory = bracket
( do sysTmpDir <- getTemporaryDirectory
let tmpDir = addTrailingPathSeparator $ sysTmpDir </> "hackage-diff"
createDirectoryIfMissing True tmpDir
return tmpDir
)
( removeDirectoryRecursive )
cabalInstall :: [String] -> ExceptT String IO ()
cabalInstall args = do
(cabalExit, _, cabalStdErr) <- liftIO $ readProcessWithExitCode "cabal" args []
unless (cabalExit == ExitSuccess) . throwError $ cabalStdErr
-- Use cabal-install to download a package from hackage
downloadPackage :: String -> FilePath -> ExceptT String IO ()
downloadPackage pkg destination = cabalInstall [ "get", pkg, "--destdir=" ++ destination ]
data ExportCmp = EAdded | ERemoved | EModified T.Text {- Old signature -} | EUnmodified
deriving (Show, Eq, Ord)
data ModuleCmp = MAdded [T.Text] -- Module was added
| MAddedParseError -- Like above, but we couldn't parse the new one
| MRemoved [T.Text] -- Module was removed
| MRemovedParseError -- Like above, but we couldn't parse the old one
| MNotSureIfModifiedParseError -- New and/or old didn't parse, can't tell
| MModified [(ExportCmp, T.Text)] -- Modified
| MUnmodifed -- Changed
deriving (Show, Eq, Ord)
type Diff = [(ModuleCmp, T.Text)]
-- Print out the computed difference, optionally with ANSI colors
outputDiff :: Diff -> Bool -> Bool -> IO ()
outputDiff diff disableColor disableLengend = do
let putStrCol color str
| disableColor = liftIO $ TI.putStr str
| otherwise = liftIO . TI.putStr $ T.pack (setSGRCode [SetColor Foreground Vivid color]) <>
str <> T.pack (setSGRCode [Reset])
putStrLnCol color str = liftIO $ putStrCol color str >> putStrLn ""
breakingChanges <- flip execStateT (0 :: Int) . forM_ diff $ \case
(MAdded exps , mname) -> do
putStrLnCol Green $ "+ " <> mname
mapM_ (\e -> putStrLnCol Green . T.pack $ printf " + %s" (T.unpack e)) exps
(MAddedParseError , mname) ->
putStrLnCol Green . T.pack $
printf " + %s (ERROR: failed to parse new version, exports not available)" mname
(MRemoved exps , mname) -> do
putStrLnCol Red $ "- " <> mname
mapM_ (\e -> modify' (+ 1) >> putStrLnCol Red (T.pack (printf " - %s" (T.unpack e)))) exps
(MRemovedParseError , mname) -> do
modify' (+ 1)
putStrLnCol Red $
" - " <> mname <> " (ERROR: failed to parse old version, exports not available)"
(MNotSureIfModifiedParseError, mname) -> do
putStrLnCol Yellow $ "× " <> mname <>
" (Potentially modified, ERROR: failed to parse new and/or old version)"
(MModified exps , mname) -> do
putStrLnCol Yellow $ "× " <> mname
forM_ exps $ \(cmp, expname) -> case cmp of
EAdded -> putStrLnCol Green $ " + " <> expname
ERemoved -> do modify' (+ 1)
putStrLnCol Red $ " - " <> expname
EModified old -> do modify' (+ 1)
putStrLnCol Yellow $ " × New: " <> expname <> "\n" <>
" Old: " <> old
EUnmodified -> return ()
(MUnmodifed , mname) -> putStrLnCol White $ "· " <> mname
unless disableLengend $ do
putStrLn ""
putStrCol Green "[+ Added] "
putStrCol Red "[- Removed] "
putStrCol Yellow "[× Modified] "
putStrCol White "[· Unmodified]\n"
unless (breakingChanges == 0) $
putStrLnCol Red . T.pack $ printf "\n%i potential breaking changes found" breakingChanges
-- All the parameters required by the various compute* functions that actually prepare the
-- data and compute the difference
data ComputeParams = ComputeParams { cpTmpDir :: FilePath
, cpPackage :: String
, cpVerA :: EitherVerPath
, cpVerB :: EitherVerPath
, cpSilentFlag :: Bool
} deriving (Eq, Show)
-- A package can be specified by a version string, a Hoogle DB file path or a package path
type VersionString = String
type EitherVerPath = Either VersionString FilePath
-- Compute a Diff by comparing the package's Hoogle DB read from disk or downloaded from Hackage
computeDiffDownloadHoogleDB :: ComputeParams -> ExceptT String IO Diff
computeDiffDownloadHoogleDB ComputeParams { .. } = do
-- Get Hoogle databases
putS "Downloading / Reading Hoogle DBs..."
(dbA, dbB) <-
either (\(e :: IOException) -> throwError $ "DB Error: " ++ show e ++ tip) return =<<
(liftIO . try $ concurrently (downloadOrRead cpVerA) (downloadOrRead cpVerB))
-- Parse
putS "Parsing Hoogle DBs..."
[parsedDBA, parsedDBB] <- forM [dbA, dbB] $ \db ->
either throwError return $ parseOnly (hoogleDBParser <* endOfInput) db
-- Debug parser in GHCi: parseOnly hoogleDBParser <$> TI.readFile "base.txt" >>=
-- \(Right db) -> mapM_ (putStrLn . show) db
-- Compare
putS "Comparing Hoogle DBs..."
return $ diffHoogleDB parsedDBA parsedDBB
where getHoogleDBURL ver = "http://hackage.haskell.org/package" </> cpPackage ++ "-" ++ ver </>
"docs" </> cpPackage <.> "txt"
-- Network.HTTP is kinda crummy, but pulling in http-client/conduit
-- just for downloading two small text files is probably not worth it
downloadURL url = T.pack <$> do
req <- simpleHTTP (getRequest url)
-- HTTP will throw an IOException for any connection error,
-- also examine the response code and throw one for every
-- non-200 one we get
code <- getResponseCode req
unless (code == (2, 0, 0)) . throwIO . userError $
"Status code " ++ show code ++ " for request " ++ url
getResponseBody req
tip = "\nYou can try building missing Hoogle DBs yourself by running with --mode=builddb"
putS = unless cpSilentFlag . liftIO . putStrLn
downloadOrRead = either (downloadURL . getHoogleDBURL) (TI.readFile)
-- Compute a Diff by comparing the package's Hoogle DB build through Haddock. Unfortunately,
-- running Haddock requires to have the package configured with all dependencies
-- installed. This can often be very slow and frequently fails for older packages, on top
-- of any Haddock failures that might happen
computeDiffBuildHoogleDB :: ComputeParams -> ExceptT String IO Diff
computeDiffBuildHoogleDB ComputeParams { .. } =
flip catchError (\e -> throwError $ e ++ tip) $ do
forM_ (lefts [cpVerA, cpVerB]) $ \ver -> do -- Only build if we don't have a DB file path
let pkg = cpPackage ++ "-" ++ ver
putS $ "Processing " ++ pkg ++ "..."
-- TODO: This is rather ugly. Cabal does not allow us to specify the target
-- directory, and the current directory is not a per-thread property.
-- While createProcess allows the specification of a working directory, our
-- preferred wrapper readProcessWithExitCode does not expose that.
-- Duplicating that function and its web of private helpers here would be
-- quite some overhead. For now we simply change the working directory of
-- the process
--
-- https://ghc.haskell.org/trac/ghc/ticket/9322#ticket
--
liftIO . setCurrentDirectory $ cpTmpDir </> pkg
-- All the steps required to get the Hoogle DB
putS " Configuring" >> cabalInstall [ "configure" ]
putS " Building Haddock" >> cabalInstall [ "haddock", "--disable-optimization", "-j", "--ghc-option=-O0", "--ghc-option=-j", "--haddock-hoogle" ]
-- Read DBs from disk
[dbA, dbB] <-
forM [cpVerA, cpVerB] $ \ver ->
(liftIO . try . TI.readFile $ either getHoogleDBPath id ver)
>>= either (\(e :: IOException) -> throwError $ show e) return
-- Parse
[parsedDBA, parsedDBB] <- forM [dbA, dbB] $ \db ->
either throwError return $ parseOnly hoogleDBParser db
-- Compare
return $ diffHoogleDB parsedDBA parsedDBB
where
putS = unless cpSilentFlag . liftIO . putStrLn
getHoogleDBPath ver = cpTmpDir </> cpPackage ++ "-" ++ ver </> "dist/doc/html" </>
cpPackage </> cpPackage <.> "txt"
tip = "\nIf downloading / building Hoogle DBs fails, you can try directly parsing " ++
"the source files by running with --mode=parsehs"
-- Compare two packages made up of readily parsed Hoogle DBs
diffHoogleDB :: [DBEntry] -> [DBEntry] -> Diff
diffHoogleDB dbA dbB = do
let [verA, verB] = flip map [dbA, dbB]
( -- Sort exports by name
map (\(nm, exps) -> (nm, sortBy (compare `on` dbeName) exps))
-- Sort modules by name
. sortBy (compare `on` fst)
-- Extract module name, put into (name, exports) pair
. map (\case ((DBModule nm):exps) -> (nm , exps)
exps -> ("(Unknown)", exps)
)
-- Group by module
. groupBy (\a b -> or $ (\case DBModule _ -> False
_ -> True
) <$> [a, b]
)
-- Filter out comments and package information
. filter (\case (DBPkgInfo _ _) -> False
(DBComment _ ) -> False
_ -> True
)
)
modulesAdded = allANotInBBy ((==) `on` fst) verB verA
modulesRemoved = allANotInBBy ((==) `on` fst) verA verB
modulesKept = intersectBy ((==) `on` fst) verA verB
resAdded = flip map modulesAdded $ \(nm, exps) ->
(MAdded . map unparseDBEntry $ exps, nm)
resRemoved = flip map modulesRemoved $ \(nm, exps) ->
(MRemoved . map unparseDBEntry $ exps, nm)
resKept =
sortBy compareKept . flip map modulesKept $ \(mname, modA') ->
-- Did the exports change?
case (modA', snd <$> find ((== mname) . fst) verB) of
(_ , Nothing ) -> -- This really should not ever happen here
(MNotSureIfModifiedParseError, mname)
(modA, Just modB)
| didExpChange -> (MModified expCmp , mname)
| otherwise -> (MUnmodifed , mname)
where -- Which exports were added / removed / modified?
didExpChange = or $ map (\case (EUnmodified, _) -> False; _ -> True) expCmp
expCmp = expAdded ++ expRemoved ++ expKept
expAdded =
[(EAdded , unparseDBEntry x) | x <- allANotInBBy compareDBEName modB modA]
expRemoved =
[(ERemoved, unparseDBEntry x) | x <- allANotInBBy compareDBEName modA modB]
expKept =
-- We don't sort by modified / unmodified here as we currently
-- don't list the unmodified ones
flip map (intersectBy compareDBEName modA modB) $ \eOld ->
case find (compareDBEName eOld) modB of
Nothing -> error "intersectBy / find is broken..."
Just eNew | compareDBEType eOld eNew ->
(EUnmodified, unparseDBEntry eOld)
| otherwise ->
(EModified $ unparseDBEntry eOld, unparseDBEntry eNew)
-- Sort everything by modification type, but make sure we sort
-- modified modules by their name, not their export list
compareKept a b = case (a, b) of
((MModified _, nameA), (MModified _, nameB)) -> compare nameA nameB
_ -> compare a b
in resAdded <> resRemoved <> resKept
-- Stupid helper to build module / export lists. Should probably switch to using
-- Data.Set for all of these operations to stop having O(n*m) everywhere
allANotInBBy :: (a -> a -> Bool) -> [a] -> [a] -> [a]
allANotInBBy f a b = filter (\m -> not $ any (f m) b) a
data DBEntry = DBModule !T.Text
| DBPkgInfo !T.Text !T.Text
| DBComment !T.Text
| DBType !T.Text !T.Text
| DBNewtype !T.Text !T.Text
| DBData !T.Text !T.Text
| DBPattern !T.Text !T.Text
| DBCtor !T.Text !T.Text
| DBClass !T.Text !T.Text
| DBInstance !T.Text !T.Text
| DBFunction !T.Text !T.Text
deriving (Eq, Show)
-- When comparing names we have to take the kind of the export into account, i.e.
-- type and value constructors may have the same name without being identical
compareDBEName :: DBEntry -> DBEntry -> Bool
compareDBEName a b = case (a, b) of
(DBModule _ , DBModule _ ) -> cmp; (DBPkgInfo _ _ , DBPkgInfo _ _ ) -> cmp;
(DBComment _ , DBComment _ ) -> cmp; (DBType _ _ , DBType _ _ ) -> cmp;
(DBNewtype _ _ , DBNewtype _ _ ) -> cmp; (DBData _ _ , DBData _ _ ) -> cmp;
(DBCtor _ _ , DBCtor _ _ ) -> cmp; (DBClass _ _ , DBClass _ _ ) -> cmp;
(DBInstance _ _, DBInstance _ _) -> cmp; (DBFunction _ _, DBFunction _ _) -> cmp;
_ -> False
where cmp = ((==) `on` dbeName) a b
-- Compare the type of two entries. If we simply compare the type string, we will
-- have mistakes like classifying those two functions as having a change in type:
--
-- func :: Num a => a -> a
-- func :: (Num a) => a -> a
--
-- So we try to parse the type with haskell-src-exts and then fall back on a string
-- compare if that fails. Parsing again every time the comparison function is called is
-- obviously rather slow, but it hasn't been an issue so far
--
-- TODO: We should do a name normalization pass on the parsed type, otherwise
-- 'id :: a -> a' and 'id :: b -> b' will be reported as different
--
compareDBEType :: DBEntry -> DBEntry -> Bool
compareDBEType a b =
-- We assume that a and b are the same kind of export (i.e. they have already been
-- matched with dbeName, which only compares exports of the same kind), and now we
-- want to know if the type differs between them
case a of
-- The syntax we use to list exported Ctors and their types can't be parsed as a
-- declaration, just compare the type part
DBCtor _ _ -> case ( parseTypeWithMode mode . T.unpack $ dbeType a
, parseTypeWithMode mode . T.unpack $ dbeType b
) of
(E.ParseOk resA, E.ParseOk resB) -> resA == resB
_ -> stringTypeCmp
-- Also can't parse our type / newtype syntax, fall back to string compare
DBType _ _ -> stringTypeCmp
DBNewtype _ _ -> stringTypeCmp
-- Parse everything else in its entirety as a top-level declaration
_ -> case ( parseDeclWithMode mode . T.unpack $ unparseDBEntry a
, parseDeclWithMode mode . T.unpack $ unparseDBEntry b
) of
(E.ParseOk resA, E.ParseOk resB) -> resA == resB
_ -> stringTypeCmp
where mode = -- Enable some common extension to make parsing more likely to succeed
defaultParseMode
{
extensions = [ EnableExtension FunctionalDependencies
, EnableExtension MultiParamTypeClasses
, EnableExtension TypeOperators
, EnableExtension KindSignatures
, EnableExtension MagicHash
, EnableExtension FlexibleContexts
]
}
stringTypeCmp = ((==) `on` dbeType) a b
-- Extract a database entry's "name" (i.e. a function name vs its type)
dbeName :: DBEntry -> T.Text
dbeName = \case
DBModule nm -> nm; DBPkgInfo k _ -> k ; DBComment _ -> "";
DBType nm _ -> nm; DBNewtype nm _ -> nm; DBData nm _ -> nm;
DBPattern nm _ -> nm; DBCtor nm _ -> nm; DBClass nm _ -> nm;
DBInstance nm _ -> nm; DBFunction nm _ -> nm
-- Extract a database entry's "type" (i.e. a function type vs its name)
dbeType :: DBEntry -> T.Text
dbeType = \case
DBModule _ -> ""; DBPkgInfo _ v -> v ; DBComment _ -> "";
DBType _ ty -> ty; DBNewtype _ ty -> ty; DBData _ ty -> ty;
DBPattern _ ty -> ty; DBCtor _ ty -> ty; DBClass _ ty -> ty;
DBInstance _ ty -> ty; DBFunction _ ty -> ty
unparseDBEntry :: DBEntry -> T.Text
unparseDBEntry = \case
DBModule nm -> "module " <> nm
DBPkgInfo k v -> "@" <> k <> v
DBComment txt -> "-- " <> txt
DBType nm ty -> "type " <> nm <> " " <> ty
DBNewtype nm ty -> "newtype " <> nm <> " " <> ty
DBData nm ty -> "data " <> nm <> (if T.null ty then "" else " " <> ty)
DBPattern nm ty -> "pattern " <> nm <> " :: " <> ty
DBCtor nm ty -> nm <> " :: " <> ty
DBClass _ ty -> "class " <> ty
DBInstance _ ty -> "instance " <> ty
DBFunction nm ty -> nm <> " :: " <> ty
-- Parse a Hoogle text database
hoogleDBParser :: Parser [DBEntry]
hoogleDBParser = many parseLine
where
parseLine = (*>) skipEmpty $ parseComment <|> parseData <|> parsePattern <|>
parsePkgInfo <|> parseDBModule <|> parseCtor <|>
parseNewtype <|> parseDBType <|> parseClass <|>
parseInstance <|> parseFunction
parseComment = string "-- " *> (DBComment <$> tillEoL)
parsePkgInfo = char '@' *> (DBPkgInfo <$> takeTill (== ' ') <*> tillEoL)
parseData = string "data " *>
( (DBData <$> takeTill (`elem` [ ' ', '\n' ]) <* endOfLine <*> "") <|>
(DBData <$> takeTill (== ' ') <* skipSpace <*> tillEoL)
)
parsePattern = string "pattern " *>
(DBPattern <$> takeTill (== ' ') <* skipSpace <* string "::" <*> tillEoL)
parseNewtype = string "newtype " *>
( (DBNewtype <$> takeTill (`elem` [ ' ', '\n' ]) <* endOfLine <*> "") <|>
(DBNewtype <$> takeTill (== ' ') <* skipSpace <*> tillEoL)
)
-- TODO: At some point Hoogle DBs started to have Ctors and functions
-- names wrapped in brackets. Not sure what's up with that, just
-- parse them as part of the name so the parser doesn't stop
parseCtor = do void . lookAhead $ satisfy isAsciiUpper <|>
(char '[' *> satisfy isAsciiUpper)
DBCtor <$> takeTill (== ' ') <* string " :: " <*> tillEoL
-- TODO: This doesn't parse function lists correctly
parseFunction = do void . lookAhead $ satisfy isAsciiLower <|> char '[' <|> char '('
DBFunction <$> takeTill (== ' ') <* string " :: " <*> tillEoL
parseInstance = do void $ string "instance "
line <- T.words <$> tillEoL
-- The name of an instance is basically everything
-- after the typeclass requirements
let nm = case break (== "=>") line of
(xs, []) -> T.unwords xs
(_, (_:xs)) -> T.unwords xs
return . DBInstance nm $ T.unwords line
parseClass = do void $ string "class "
line <- T.words <$> tillEoL
let nm = case break (== "=>") line of
((n:_), []) -> n
(_, (_:n:_)) -> n
_ -> ""
-- TODO: Sometimes typeclasses have all their default method
-- implementations listed right after the 'where' part,
-- just cut all of this off for now
trunc = fst . break (== "where") $ line
in return . DBClass nm $ T.unwords trunc
parseDBType = string "type " *> (DBType <$> takeTill (== ' ') <* skipSpace <*> tillEoL)
parseDBModule = string "module " *> (DBModule <$> takeTill (== '\n')) <* endOfLine
skipEmpty = many endOfLine
tillEoL = takeTill (== '\n') <* endOfLine
-- Compute a Diff by processing Haskell files directly. We use the Cabal API to locate and
-- parse the package .cabal file, extract a list of modules from it, and then pre-process
-- each module with cpphs and finally parse it with haskell-src-exts. The principal issue
-- with this approach is the often complex use of the CPP inside Haskell packages, making
-- this fail fairly often. This method also currently does not look at type signatures and
-- has various other limitations, like not working with modules that do not have an
-- export list
computeDiffParseHaskell :: ComputeParams -> ExceptT String IO Diff
computeDiffParseHaskell ComputeParams { .. } = do
[mListA, mListB] <- forM [cpVerA, cpVerB] $ \ver -> do
let pkgPath = either (\v -> cpTmpDir </> cpPackage ++ "-" ++ v) id ver
unless cpSilentFlag . liftIO . putStrLn $ "Processing " ++ pkgPath ++ "..."
-- Find .cabal file
dotCabal <- (liftIO . findPackageDesc $ pkgPath) >>= either throwError return
-- Parse .cabal file, extract exported modules
exports <- condLibrary <$> (liftIO $ readGenericPackageDescription normal dotCabal) >>= \case
Nothing -> throwError $ pkgPath ++ " is not a library"
Just node -> return $ exposedModules . condTreeData $ node
-- Build module name / module source file list
--
-- TODO: Some packages have a more complex source structure, need to look at the
-- cabal file some more to locate the files
let modules = flip map exports $
\m -> ( concat . intersperse "." . components $ m
, pkgPath </> toFilePath m <.> "hs" -- TODO: Also .lhs?
)
-- Parse modules
liftIO . forM modules $ \(modName, modPath) -> do
unless cpSilentFlag . putStrLn $ " Parsing " ++ modName
Main.parseModule modPath >>= either
-- Errors only affecting single modules are recoverable, just
-- print them instead of throwing
(\e -> putStrLn (" " ++ e) >> return (T.pack modName, Nothing))
(\r -> return (T.pack modName, Just r ))
-- Compute difference
return $ comparePackageModules mListA mListB
-- Parse a Haskell module interface using haskell-src-exts and cpphs
parseModule :: FilePath -> IO (Either String (Module SrcSpanInfo))
parseModule modPath = runExceptT $ do
(liftIO $ doesFileExist modPath) >>= flip unless
(throwError $ "Can't open source file '" ++ modPath ++ "'")
-- Run cpphs as pre-processor over our module
--
-- TODO: This obviously doesn't have the same defines and include paths set like
-- when compiling with GHC, major source of failures right now
modSrcCPP <- liftIO $ readFile modPath >>= runCpphs defaultCpphsOptions modPath
-- Parse pre-processed Haskell source. This pure parsing function unfortunately throws
-- exceptions for things like encountering an '#error' directive in the code, so we
-- also have to handle those as well
(liftIO . try . evaluate $
parseFileContentsWithMode defaultParseMode { parseFilename = modPath } modSrcCPP)
>>= \case Left (e :: ErrorCall) ->
throwError $ "Haskell Parse Exception - " ++ show e
Right (E.ParseFailed (SrcLoc fn ln cl) err) ->
throwError $ printf "Haskell Parse Error - %s:%i:%i: %s" fn ln cl err
Right (E.ParseOk parsedModule) ->
return parsedModule
type PackageModuleList = [(T.Text, Maybe (Module SrcSpanInfo))]
-- Compare two packages made up of readily parsed Haskell modules
comparePackageModules :: PackageModuleList -> PackageModuleList -> Diff
comparePackageModules verA verB = do
let -- Compare lists of modules
modulesAdded = allANotInBBy ((==) `on` fst) verB verA
modulesRemoved = allANotInBBy ((==) `on` fst) verA verB
modulesKept = intersectBy ((==) `on` fst) verA verB
-- Build result Diff of modules
resAdded = flip map modulesAdded $ \case
(mname, Just m ) ->
(MAdded . map (T.pack . prettyPrint) $ moduleExports m, mname)
(mname, Nothing) ->
(MAddedParseError, mname)
resRemoved = flip map modulesRemoved $ \case
(mname, Just m ) ->
(MRemoved . map (T.pack . prettyPrint) $ moduleExports m, mname)
(mname, Nothing) ->
(MRemovedParseError, mname)
-- TODO: This doesn't sort correctly by type of change + name
resKept = sortBy (compare `on` fst) . flip map modulesKept $ \(mname, modA') ->
-- Did the exports change?
case (modA', findModule verB mname) of
(_, Nothing) -> (MNotSureIfModifiedParseError, mname)
(Nothing, _) -> (MNotSureIfModifiedParseError, mname)
(Just modA, Just modB)
| moduleExports modA == moduleExports modB
-> (MUnmodifed , mname)
| otherwise -> (MModified expCmp, mname)
where
-- Which exports were added / removed?
expCmp =
[(EAdded , T.pack $ prettyPrint x) | x <- expAdded ] ++
[(ERemoved , T.pack $ prettyPrint x) | x <- expRemoved ] ++
[(EUnmodified, T.pack $ prettyPrint x) | x <- expUnmodified]
-- TODO: We do not look for type changes, no EModified
expAdded = allANotInBBy (==) (moduleExports modB)
(moduleExports modA)
expRemoved = allANotInBBy (==) (moduleExports modA)
(moduleExports modB)
expUnmodified = intersectBy (==) (moduleExports modA)
(moduleExports modB)
-- TODO: If the module does not have an export spec, we assume it exports nothing
moduleExports (Module _ (Just (ModuleHead _ _ _ (Just (ExportSpecList _ exportSpec)))) _ _ _ ) = exportSpec
moduleExports _ = []
findModule mlist mname = maybe Nothing snd $ find ((== mname) . fst) mlist
in resAdded ++ resRemoved ++ resKept