Initial Commit.

This commit is contained in:
Jeremy Rand 2013-02-01 23:24:36 -06:00
parent 9b1be2ce7f
commit d50e320cb1

59
yacy_indexer.user.js Normal file
View file

@ -0,0 +1,59 @@
// ==UserScript==
// @name YaCyIndexer
// @namespace https://veclabs.posterous.com/
// @description Indexes visited pages with YaCy.
// @version 0.1
// @match *://*/*
// @grant GM_xmlhttpRequest
// @grant GM_log
// ==/UserScript==
// User parameters:
var paramYaCyLocation = 'http://localhost:8090';
var paramEnableQueryString = false; // Index pages with query strings (possible privacy leak).
var paramDepth = 1; // 0 = only the visited page; 1 = all links on visted page; higher values will index deeper but use exponentially more bandwidth.
var paramAgeNum = 7; // pages already indexed since this time won't be re-indexed.
var paramAgeUnit = 'day'; // units for above
// YaCy Arguments -- Don't change these unless you've read the YaCy API docs.
var crawlingstart = '';
var crawlingMode = 'url';
var crawlingURL = paramEnableQueryString ? window.location.href : [location.protocol, '//', location.host, location.pathname].join('');
var bookmarkTitle = '';
var crawlingDepth = paramDepth;
var directDocByURL = 'off';
var crawlingDepthExtension = '';
var range = 'wide';
var mustmatch = '.*';
var mustnotmatch = '';
var ipMustmatch = '.*';
//var ipMustnotmatch = '(^127\.)|(^10\.)|(^172\.1[6-9]\.)|(^172\.2[0-9]\.)|(^172\.3[0-1]\.)|(^192\.168\.)'; // http://stackoverflow.com/questions/2814002/private-ip-address-identifier-in-regular-expression
var ipMustnotmatch = '';
var indexmustmatch = '.*';
var indexmustnotmatch = '';
var deleteold = 'off';
var recrawl = 'reload';
var reloadIfOlderNumber = paramAgeNum;
var reloadIfOlderUnit = paramAgeUnit;
var countryMustMatchSwitch = 'false';
var crawlingDomMaxCheck = 'off';
var crawlingQ = paramEnableQueryString ? 'on' : 'off';
var storeHTCache = 'off';
var cachePolicy = 'iffresh';
var indexText = 'on';
var indexMedia = 'on';
var crawlOrder = 'off';
var collection = 'user';
var yacy_url = paramYaCyLocation + '/Crawler_p.html?crawlingstart=' + encodeURIComponent(crawlingstart) + '&crawlingMode=' + encodeURIComponent(crawlingMode) + '&crawlingURL=' + encodeURIComponent(crawlingURL) + '&bookmarkTitle=' + encodeURIComponent(bookmarkTitle) + '&crawlingDepth=' + encodeURIComponent(crawlingDepth) + '&directDocByURL=' + encodeURIComponent(directDocByURL) + '&crawlingDepthExtension=' + encodeURIComponent(crawlingDepthExtension) + '&range=' + encodeURIComponent(range) + '&mustmatch=' + encodeURIComponent(mustmatch) + '&mustnotmatch=' + encodeURIComponent(mustnotmatch) + '&ipMustmatch=' + encodeURIComponent(ipMustmatch) + '&ipMustnotmatch=' + encodeURIComponent(ipMustnotmatch) + '&indexmustmatch=' + encodeURIComponent(indexmustmatch) + '&indexmustnotmatch=' + encodeURIComponent(indexmustnotmatch) + '&deleteold=' + encodeURIComponent(deleteold) + '&recrawl=' + encodeURIComponent(recrawl) + '&reloadIfOlderNumber=' + encodeURIComponent(reloadIfOlderNumber) + '&reloadIfOlderUnit=' + encodeURIComponent(reloadIfOlderUnit) + '&countryMustMatchSwitch=' + encodeURIComponent(countryMustMatchSwitch) + '&crawlingDomMaxCheck=' + encodeURIComponent(crawlingDomMaxCheck) + '&crawlingQ=' + encodeURIComponent(crawlingQ) + '&storeHTCache=' + encodeURIComponent(storeHTCache) + '&cachePolicy=' + encodeURIComponent(cachePolicy) + '&indexText=' + encodeURIComponent(indexText) + '&indexMedia=' + encodeURIComponent(indexMedia) + '&crawlOrder=' + encodeURIComponent(crawlOrder) + '&collection=' + encodeURIComponent(collection);
//GM_log(yacy_url);
GM_xmlhttpRequest({
method: "GET",
url: yacy_url,
onload: function(response) {
//GM_log("YaCy indexing should commence.");
}
});