mirror of
https://github.com/JeremyRand/YaCyIndexerGreasemonkey
synced 2024-11-23 21:55:45 +00:00
Initial Commit.
This commit is contained in:
parent
9b1be2ce7f
commit
d50e320cb1
1 changed files with 59 additions and 0 deletions
59
yacy_indexer.user.js
Normal file
59
yacy_indexer.user.js
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
// ==UserScript==
|
||||||
|
// @name YaCyIndexer
|
||||||
|
// @namespace https://veclabs.posterous.com/
|
||||||
|
// @description Indexes visited pages with YaCy.
|
||||||
|
// @version 0.1
|
||||||
|
// @match *://*/*
|
||||||
|
// @grant GM_xmlhttpRequest
|
||||||
|
// @grant GM_log
|
||||||
|
// ==/UserScript==
|
||||||
|
|
||||||
|
// User parameters:
|
||||||
|
var paramYaCyLocation = 'http://localhost:8090';
|
||||||
|
var paramEnableQueryString = false; // Index pages with query strings (possible privacy leak).
|
||||||
|
var paramDepth = 1; // 0 = only the visited page; 1 = all links on visted page; higher values will index deeper but use exponentially more bandwidth.
|
||||||
|
var paramAgeNum = 7; // pages already indexed since this time won't be re-indexed.
|
||||||
|
var paramAgeUnit = 'day'; // units for above
|
||||||
|
|
||||||
|
// YaCy Arguments -- Don't change these unless you've read the YaCy API docs.
|
||||||
|
var crawlingstart = '';
|
||||||
|
var crawlingMode = 'url';
|
||||||
|
var crawlingURL = paramEnableQueryString ? window.location.href : [location.protocol, '//', location.host, location.pathname].join('');
|
||||||
|
var bookmarkTitle = '';
|
||||||
|
var crawlingDepth = paramDepth;
|
||||||
|
var directDocByURL = 'off';
|
||||||
|
var crawlingDepthExtension = '';
|
||||||
|
var range = 'wide';
|
||||||
|
var mustmatch = '.*';
|
||||||
|
var mustnotmatch = '';
|
||||||
|
var ipMustmatch = '.*';
|
||||||
|
//var ipMustnotmatch = '(^127\.)|(^10\.)|(^172\.1[6-9]\.)|(^172\.2[0-9]\.)|(^172\.3[0-1]\.)|(^192\.168\.)'; // http://stackoverflow.com/questions/2814002/private-ip-address-identifier-in-regular-expression
|
||||||
|
var ipMustnotmatch = '';
|
||||||
|
var indexmustmatch = '.*';
|
||||||
|
var indexmustnotmatch = '';
|
||||||
|
var deleteold = 'off';
|
||||||
|
var recrawl = 'reload';
|
||||||
|
var reloadIfOlderNumber = paramAgeNum;
|
||||||
|
var reloadIfOlderUnit = paramAgeUnit;
|
||||||
|
var countryMustMatchSwitch = 'false';
|
||||||
|
var crawlingDomMaxCheck = 'off';
|
||||||
|
var crawlingQ = paramEnableQueryString ? 'on' : 'off';
|
||||||
|
var storeHTCache = 'off';
|
||||||
|
var cachePolicy = 'iffresh';
|
||||||
|
var indexText = 'on';
|
||||||
|
var indexMedia = 'on';
|
||||||
|
var crawlOrder = 'off';
|
||||||
|
var collection = 'user';
|
||||||
|
|
||||||
|
var yacy_url = paramYaCyLocation + '/Crawler_p.html?crawlingstart=' + encodeURIComponent(crawlingstart) + '&crawlingMode=' + encodeURIComponent(crawlingMode) + '&crawlingURL=' + encodeURIComponent(crawlingURL) + '&bookmarkTitle=' + encodeURIComponent(bookmarkTitle) + '&crawlingDepth=' + encodeURIComponent(crawlingDepth) + '&directDocByURL=' + encodeURIComponent(directDocByURL) + '&crawlingDepthExtension=' + encodeURIComponent(crawlingDepthExtension) + '&range=' + encodeURIComponent(range) + '&mustmatch=' + encodeURIComponent(mustmatch) + '&mustnotmatch=' + encodeURIComponent(mustnotmatch) + '&ipMustmatch=' + encodeURIComponent(ipMustmatch) + '&ipMustnotmatch=' + encodeURIComponent(ipMustnotmatch) + '&indexmustmatch=' + encodeURIComponent(indexmustmatch) + '&indexmustnotmatch=' + encodeURIComponent(indexmustnotmatch) + '&deleteold=' + encodeURIComponent(deleteold) + '&recrawl=' + encodeURIComponent(recrawl) + '&reloadIfOlderNumber=' + encodeURIComponent(reloadIfOlderNumber) + '&reloadIfOlderUnit=' + encodeURIComponent(reloadIfOlderUnit) + '&countryMustMatchSwitch=' + encodeURIComponent(countryMustMatchSwitch) + '&crawlingDomMaxCheck=' + encodeURIComponent(crawlingDomMaxCheck) + '&crawlingQ=' + encodeURIComponent(crawlingQ) + '&storeHTCache=' + encodeURIComponent(storeHTCache) + '&cachePolicy=' + encodeURIComponent(cachePolicy) + '&indexText=' + encodeURIComponent(indexText) + '&indexMedia=' + encodeURIComponent(indexMedia) + '&crawlOrder=' + encodeURIComponent(crawlOrder) + '&collection=' + encodeURIComponent(collection);
|
||||||
|
|
||||||
|
//GM_log(yacy_url);
|
||||||
|
|
||||||
|
GM_xmlhttpRequest({
|
||||||
|
method: "GET",
|
||||||
|
url: yacy_url,
|
||||||
|
onload: function(response) {
|
||||||
|
//GM_log("YaCy indexing should commence.");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
Loading…
Reference in a new issue