gnucash/doc-tools/build-help-index.in
Dave Peticolas 1e3fd8b171 Build help index database using a combination of perl & C. Use
C to create and fill the database, to ensure the same db lib is
used to both create and read the db.


git-svn-id: svn+ssh://svn.gnucash.org/repo/gnucash/trunk@3338 57a11ea4-9604-0410-9ed3-97b8803252fd
2000-12-22 03:47:40 +00:00

63 lines
1.5 KiB
Perl

#!@-PERL-@ -w
# -*- perl -*-
use strict;
my %contents;
my $dbadd = $ARGV[0];
shift @ARGV;
my $db_file = $ARGV[0];
shift @ARGV;
foreach my $file (@ARGV) {
my %filewords;
open (HELPFILE, $file) or die "Bad help file $file specified.\n";
my $size = (stat($file))[7];
my $data;
read HELPFILE, $data, $size;
$data =~ s/<[^>]*>/ /gs; # get rid of HTML tags
$data =~ tr/\",();&<>!$*/ /; # get rid of extra punct
$data =~ tr/[A-Z]/[a-z]/; # lowercase everything
$data =~ tr/ \012\011/ /s; # crunch whitespace
$data =~ s/[\.,\'\":\;\+|-]+ / /gs; # get rid of terminal punct
$data =~ s/ [.,\'\":;+|-]+/ /gs; # get rid of initial punct
$data =~ s/ [^ ] / /gs; # remove 1-letter words
$data =~ s/ [^ ][^ ] / /gs; # remove 2-letter words
$data =~ tr/ \012\011/ /s; # crunch whitespace again
my @words = split (' ', $data);
@words = sort (@words);
foreach my $w (@words) {
$filewords{$w} = ' ';
}
foreach my $w (keys(%filewords)) {
my $flist = $contents{$w};
$flist = "" unless $flist;
$contents{$w} = "$flist$file\012";
}
}
my @keys = keys (%contents);
my @prefix;
# we don't store all the keys at once in case there
# are limits on the size of argv. But do more than
# one at a time for efficiency.
while (@prefix = splice (@keys, 0, 32)) {
my @args = ();
foreach my $w (@prefix) {
unshift (@args, $w, $contents{$w});
}
system ($dbadd, $db_file, @args);
}