forked from curl/everything-curl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mkindex.pl
executable file
·133 lines (110 loc) · 2.95 KB
/
mkindex.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/perl
# Build an index of words in the file index-words that are found in the text.
# Words are compared case insensitively except for those starting with a dash
# (i.e. program option names). "Words" may actually be phrases consisting of
# more than one word separated by a space. The word as written in the index is
# as found in the file (i.e. using that case).
use feature "fc";
# Return the case-folded keyword UNLESS it appears to be an option string
# in which case return it as-is. This makes word lookups case-insensitive
# but option name lookups case-sensitive.
sub folded {
return $_[0] if $_[0] =~ /^-/;
return fc($_[0]);
}
# get all markdown files as arguments
my @files=@ARGV;
open(F, "<index-words") ||
die "no words";
while(<F>) {
chomp;
my $w = $_;
if($w =~ /[ .]/) {
# word with spaces or periods
push @lwords, folded($w);
}
$index{folded($w)}=$w;
}
close(F);
sub urlify {
my ($fname, $section)=@_;
$section =~ tr/[A-Z]/[a-z]/;
# remove leading dashes amd dots
$section =~ s/^[-\.]+//g;
# convert some bytes to dashes
$section =~ s/[ \/\@]/-/g;
# remove rubbish
$section =~ s/[*`'":\(\),]+//g;
# < => less-than-
$section =~ s/^\</less-than-/g;
# > => less-than-
$section =~ s/\>$/-greater-than/g;
return "$fname#$section";
}
sub single {
my ($fname)=@_;
my $depth;
my $section;
my $url;
open(F, "<$fname");
while(<F>) {
chomp;
my $l=$_;
if($_ =~ /^(#[\#]*) (.*)/) {
$depth = $1;
$section = $2;
$url=urlify($fname, $section);
# print "$fname / \"$2\"\n";
$l = $section; # use this too
}
my @words = split(/[ \(\)]+/, $_);
for my $w (@words) {
$w =~ s/[,\.\`\'\]\[]//g;
$w = folded($w);
if($index{$w}) {
if(!$word{$w}{$fname}) {
$word{$w}{$fname}++;
$all{$w} .= ($all{$w}?", ":"")."[$section]($url)";
}
}
}
# check longer words
foreach my $w (@lwords) {
if(folded($l) =~ /$w/) {
if(!$word{$w}{$fname}) {
#print " $w ($url)\n";
$word{$w}{$fname}++;
$all{$w} .= ($all{$w}?", ":"")."[$section]($url)";
}
}
}
}
close(F);
}
for my $f (@files) {
single($f);
}
print "# Index\n\n";
sub sorting {
my ($s) = @_;
$s = uc($s); # first uppercase
$s =~ s/^[-]+//; # remove initial junk
return $s;
}
sub byname {
my $c = sorting($a) cmp sorting($b);
if(!$c) {
$c = $a cmp $b;
}
return $c;
}
my %letter;
foreach my $w (sort byname keys %all) {
my $l = substr(sorting($w), 0, 1);
if(!$letter{$l}) {
$letter{$l}++;
print "## $l\n";
}
printf " - ".$index{$w}.": ";
print $all{$w}."\n";
}