-
Notifications
You must be signed in to change notification settings - Fork 0
/
catuniq.kt
executable file
·41 lines (34 loc) · 1.36 KB
/
catuniq.kt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
//usr/bin/env [ $0 -nt $0.jar ] && kotlinc -d $0.jar $0; [ $0.jar -nt $0 ] && java -cp $CLASSPATH:$0.jar CatuniqKt $@; exit 0
import java.io.File
val seen = HashSet<String>()
// usage: catuniq searchfiles - seenfiles
// show all the lines in searchfiles which are not in seenfiles
fun main(args: Array<String>) {
args.takeLastWhile { it != "-" }.forEach { File(it).forEachLine { seen.add(hash(it)) } }
args.takeWhile { it != "-" }.forEach { catuniq(it) }
}
fun hash(str: String): String {
return str.replace(Regex("^.*│"), "") // remove old journal prefix
.replace(Regex("^............. \\|. "), "") // remove new journal prefix
.replace(Regex(" [/+#=!>@:][^/+#=!>: ].*"), "") // remove trailing tags
.replace(Regex(" --.*$"), "") // remove attributions
.replace(Regex("[^a-zA-Z]"), "") // normalize
.lowercase()
}
fun catuniq(filename: String) {
println("\n======= " + filename + "\n")
File(filename).forEachLine {
var hash = hash(it)
if (!seen.contains(hash)) {
seen.add(hash)
println(filename.split("/").last() + ":\t $it")
}
}
}
fun catdup(filename: String) {
println("\n======= " + filename + "\n")
File(filename).forEachLine {
var hash = hash(it)
if (hash.isNotBlank() && seen.contains(hash)) println(it)
}
}