Commit 0e23379b authored by Albert Gräf's avatar Albert Gräf
Browse files

Rework the index creation process.

Pass 1 now iterates over the default hierarchy (either doc or the entire
libdir) and the help path (if enabled) to determine names and locations
of all help patches in the scope of the help browser (as set in the GUI
preferences). This initial pass is needed, in particular, to resolve
related object references in the meta data in pass 2.

Pass 2 then iterates over all index entries constructed in pass 1,
adding all the available meta data, including cross references to
related objects. When pass 2 finishes, index construction is complete
and the index cache is written to disk.

Note the new revision of the help browser narrows indexing to just the
-help.pd patches. Previous versions would index all .pd files. This may be
subject to review, but the new indexing scheme is faster and produces less
noise in the earch results (i.e., you won't see any helper abstractions or
other patches which just happen to be bundled with the help patches).
parent 9309710e
Pipeline #3731 failed with stage
in 0 seconds
...@@ -13,14 +13,6 @@ exports.get_pwd = function() { ...@@ -13,14 +13,6 @@ exports.get_pwd = function() {
return pwd; return pwd;
} }
function funkify_windows_path(s) {
var ret = s;
if (process.platform === "win32") {
ret = ret.replace(/\//g, "\\");
}
return ret;
}
function defunkify_windows_path(s) { function defunkify_windows_path(s) {
var ret = s; var ret = s;
if (process.platform === "win32") { if (process.platform === "win32") {
...@@ -33,8 +25,6 @@ exports.set_pd_engine_id = function (id) { ...@@ -33,8 +25,6 @@ exports.set_pd_engine_id = function (id) {
pd_engine_id = id; pd_engine_id = id;
} }
exports.funkify_windows_path = funkify_windows_path;
exports.defunkify_windows_path = defunkify_windows_path; exports.defunkify_windows_path = defunkify_windows_path;
function gui_set_browser_config(doc_flag, path_flag, init_flag, helppath) { function gui_set_browser_config(doc_flag, path_flag, init_flag, helppath) {
...@@ -123,7 +113,6 @@ function init_elasticlunr() ...@@ -123,7 +113,6 @@ function init_elasticlunr()
index.addField("description"); index.addField("description");
index.addField("related_objects"); index.addField("related_objects");
index.addField("ref_related_objects"); index.addField("ref_related_objects");
index.addField("dir");
index.setRef("id"); index.setRef("id");
return index; return index;
} }
...@@ -132,12 +121,6 @@ var index = init_elasticlunr(); ...@@ -132,12 +121,6 @@ var index = init_elasticlunr();
var index_cache = new Array(); var index_cache = new Array();
var index_manif = new Set(); var index_manif = new Set();
function regex_dir(dir) {
let str_regex = funkify_windows_path(dir).replace(/\\/g, "\\\\").replace(/\//g, "\\\/")
+ "\\" + path.sep + "?([\\w]*)\\" + path.sep + "?([\\w|\\.|\\-|\\~]*)\\" + path.sep + "?([\\S]*)";
return (RegExp(str_regex.toString()));
}
function index_entry_esc(s) { function index_entry_esc(s) {
if (s) { if (s) {
var t = s.replace(/\\/g, "\\\\").replace(/:/g, "\\:"); var t = s.replace(/\\/g, "\\\\").replace(/:/g, "\\:");
...@@ -147,7 +130,8 @@ function index_entry_esc(s) { ...@@ -147,7 +130,8 @@ function index_entry_esc(s) {
} }
} }
// GB: Add related_objects, keywords and description of files in indexing // GB: This actually retrieves the meta data concerning related_objects,
// keywords, and description of help patches.
function add_doc_details_to_index(filename, data) { function add_doc_details_to_index(filename, data) {
var title = path.basename(filename, "-help.pd"), var title = path.basename(filename, "-help.pd"),
big_line = data.replace("\n", " "), big_line = data.replace("\n", " "),
...@@ -216,19 +200,19 @@ function add_doc_details_to_index(filename, data) { ...@@ -216,19 +200,19 @@ function add_doc_details_to_index(filename, data) {
rel_objs = rel_objs.found_objects; rel_objs = rel_objs.found_objects;
rel_objs = rel_objs ? rel_objs.toString().replace(/\,/g, " ") : null; rel_objs = rel_objs ? rel_objs.toString().replace(/\,/g, " ") : null;
// We use [\s\S] to match across multiple lines... // We use [\s\S] to match across multiple lines...
keywords = big_line keywords = big_line
.match(/#X text \-?[0-9]+ \-?[0-9]+ KEYWORDS ([\s\S]*?);/i); .match(/#X text \-?[0-9]+ \-?[0-9]+ KEYWORDS ([\s\S]*?);/i);
desc = big_line desc = big_line
.match(/#X text \-?[0-9]+ \-?[0-9]+ DESCRIPTION ([\s\S]*?);/i); .match(/#X text \-?[0-9]+ \-?[0-9]+ DESCRIPTION ([\s\S]*?);/i);
keywords = keywords && keywords.length > 1 ? keywords[1].trim() : null; keywords = keywords && keywords.length > 1 ? keywords[1].trim() : null;
desc = desc && desc.length > 1 ? desc[1].trim() : null; desc = desc && desc.length > 1 ? desc[1].trim() : null;
// Remove the Pd escapes for commas // Remove the Pd escapes for commas
desc = desc ? desc.replace(" \\,", ",") : null; desc = desc ? desc.replace(" \\,", ",") : null;
if (desc) { if (desc) {
// format Pd's "comma atoms" as normal commas // format Pd's "comma atoms" as normal commas
desc = desc.replace(" \\,", ","); desc = desc.replace(" \\,", ",");
} }
index_cache[index_cache.length] = [filename, title, keywords, desc, rel_objs, ref_rel_objs] index_cache[index_cache.length] = [filename, title, keywords, desc, rel_objs, ref_rel_objs]
.map(index_entry_esc).join(":"); .map(index_entry_esc).join(":");
...@@ -247,30 +231,16 @@ function add_doc_details_to_index(filename, data) { ...@@ -247,30 +231,16 @@ function add_doc_details_to_index(filename, data) {
}); });
} }
function make_cache(filename) { // GB: This does an initial scan of help patches, recording filename, title and
index_cache[index_cache.length] = [filename, path.basename(filename, "-help.pd"), null, null, null, null] // parent dir, without looking at the meta data.
.map(index_entry_esc).join(":"); function add_doc_to_index(err, filename, stat) {
var d = path.dirname(filename);
index_manif.add(d);
// Also add the parent directory to catch additions of siblings.
index_manif.add(path.dirname(d));
}
// GB: Index all the files in Purr Data folder considering its filename, title and parent dir
function add_doc_to_fast_index(err, filename, stat) {
if (!err) { if (!err) {
if (filename.slice(-8) === "-help.pd") { if (filename.slice(-8) === "-help.pd") {
try { try {
let title = path.basename(filename, "-help.pd"); let title = path.basename(filename, "-help.pd");
let regex_dir_compare = filename.match(regex_home_dir);
var dir = regex_dir_compare[1];
if (dir=="extra" && regex_dir_compare[3]) {
dir = dir + "\/" + regex_dir_compare[2];
}
index.addDoc({ index.addDoc({
"id": filename, "id": filename,
"title": title, "title": title
"dir": dir
}) })
} catch (read_err) { } catch (read_err) {
post("err: " + read_err); post("err: " + read_err);
...@@ -285,7 +255,6 @@ function add_doc_to_fast_index(err, filename, stat) { ...@@ -285,7 +255,6 @@ function add_doc_to_fast_index(err, filename, stat) {
var index_done = false; var index_done = false;
var index_started = false; var index_started = false;
var index_start_time; var index_start_time;
var regex_home_dir;
// Filenames for the index cache, relative to the user's homedir. // Filenames for the index cache, relative to the user's homedir.
const cache_basename = nw_os_is_windows const cache_basename = nw_os_is_windows
...@@ -367,21 +336,10 @@ function make_index() { ...@@ -367,21 +336,10 @@ function make_index() {
var doc_path = browser_doc?path.join(lib_dir, "doc"):lib_dir; var doc_path = browser_doc?path.join(lib_dir, "doc"):lib_dir;
var i = 0; var i = 0;
var l = help_path.length; var l = help_path.length;
function detail_files () { function detail_files() {
post("adding details to files in " + expand_tilde(doc_path));
let dir = expand_tilde(doc_path).match(regex_home_dir);
let all_indexed_files = Object.keys(index.documentStore.docs); let all_indexed_files = Object.keys(index.documentStore.docs);
let files_not_to_detail = all_indexed_files; var data;
var files_to_detail, data; all_indexed_files.forEach(function(filename,i,a) {
if (!dir) {
files_to_detail = all_indexed_files;
files_not_to_detail = null;
} else {
dir = (dir[1]=="extra" && dir[3])?(dir[1]+"\/"+dir[2]):dir[1];
files_to_detail = index.search(dir,{fields: {dir: {}}}).map(obj => obj.ref);
files_not_to_detail = files_not_to_detail.filter(doc => !files_to_detail.includes(doc));
}
files_to_detail.forEach(function(filename,i,a) {
// AG: We MUST read the files synchronously here. This might be a // AG: We MUST read the files synchronously here. This might be a
// performance issue on some systems, but if we don't do this then // performance issue on some systems, but if we don't do this then
// we may open a huge number of files simultaneously, causing the // we may open a huge number of files simultaneously, causing the
...@@ -393,26 +351,27 @@ function make_index() { ...@@ -393,26 +351,27 @@ function make_index() {
post("err: " + read_err); post("err: " + read_err);
} }
}); });
if (browser_path) make_index_cont();
if (files_not_to_detail) files_not_to_detail.forEach(file => make_cache(file));
finish_index(); finish_index();
} }
function make_index_cont() { function make_index_cont() {
if (i < l) { if (browser_path && i < l) {
var doc_path = help_path[i++]; var doc_path = help_path[i++];
// AG: These paths might not exist, ignore them in this case. Also // AG: These paths might not exist, ignore them in this case. Also
// note that we need to expand ~ here. // note that we need to expand ~ here.
var full_path = expand_tilde(doc_path); var full_path = expand_tilde(doc_path);
fs.lstat(full_path, function(err, stat) { fs.lstat(full_path, function(err, stat) {
if (!err) { if (!err) {
post("building help index in " + doc_path); post("scanning help patches in " + doc_path);
detail_files(); dive(full_path, add_doc_to_index, make_index_cont);
} else { } else {
make_index_cont(); make_index_cont();
} }
}); });
} else { } else {
// finish_index(); // reset the help path index, then invoke the main pass
i = 0;
post("building help index");
detail_files();
} }
} }
pdsend("pd gui-busy 1"); pdsend("pd gui-busy 1");
...@@ -459,9 +418,8 @@ function make_index() { ...@@ -459,9 +418,8 @@ function make_index() {
} else { } else {
// no index cache, or it is out of date, so (re)build it now, and // no index cache, or it is out of date, so (re)build it now, and
// save the new cache along the way // save the new cache along the way
regex_home_dir = regex_dir(lib_dir); post("scanning help patches in " + doc_path);
post("building help index in " + lib_dir); dive(doc_path, add_doc_to_index, make_index_cont);
dive(lib_dir, add_doc_to_fast_index, detail_files);
} }
pdsend("pd gui-busy 0"); pdsend("pd gui-busy 0");
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment