From d50e320cb1b1ab564172ace961059346795769ff Mon Sep 17 00:00:00 2001 From: Jeremy Rand Date: Fri, 1 Feb 2013 23:24:36 -0600 Subject: [PATCH] Initial Commit. --- yacy_indexer.user.js | 59 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 yacy_indexer.user.js diff --git a/yacy_indexer.user.js b/yacy_indexer.user.js new file mode 100644 index 0000000..2866475 --- /dev/null +++ b/yacy_indexer.user.js @@ -0,0 +1,59 @@ +// ==UserScript== +// @name YaCyIndexer +// @namespace https://veclabs.posterous.com/ +// @description Indexes visited pages with YaCy. +// @version 0.1 +// @match *://*/* +// @grant GM_xmlhttpRequest +// @grant GM_log +// ==/UserScript== + +// User parameters: +var paramYaCyLocation = 'http://localhost:8090'; +var paramEnableQueryString = false; // Index pages with query strings (possible privacy leak). +var paramDepth = 1; // 0 = only the visited page; 1 = all links on visted page; higher values will index deeper but use exponentially more bandwidth. +var paramAgeNum = 7; // pages already indexed since this time won't be re-indexed. +var paramAgeUnit = 'day'; // units for above + +// YaCy Arguments -- Don't change these unless you've read the YaCy API docs. +var crawlingstart = ''; +var crawlingMode = 'url'; +var crawlingURL = paramEnableQueryString ? window.location.href : [location.protocol, '//', location.host, location.pathname].join(''); +var bookmarkTitle = ''; +var crawlingDepth = paramDepth; +var directDocByURL = 'off'; +var crawlingDepthExtension = ''; +var range = 'wide'; +var mustmatch = '.*'; +var mustnotmatch = ''; +var ipMustmatch = '.*'; +//var ipMustnotmatch = '(^127\.)|(^10\.)|(^172\.1[6-9]\.)|(^172\.2[0-9]\.)|(^172\.3[0-1]\.)|(^192\.168\.)'; // http://stackoverflow.com/questions/2814002/private-ip-address-identifier-in-regular-expression +var ipMustnotmatch = ''; +var indexmustmatch = '.*'; +var indexmustnotmatch = ''; +var deleteold = 'off'; +var recrawl = 'reload'; +var reloadIfOlderNumber = paramAgeNum; +var reloadIfOlderUnit = paramAgeUnit; +var countryMustMatchSwitch = 'false'; +var crawlingDomMaxCheck = 'off'; +var crawlingQ = paramEnableQueryString ? 'on' : 'off'; +var storeHTCache = 'off'; +var cachePolicy = 'iffresh'; +var indexText = 'on'; +var indexMedia = 'on'; +var crawlOrder = 'off'; +var collection = 'user'; + +var yacy_url = paramYaCyLocation + '/Crawler_p.html?crawlingstart=' + encodeURIComponent(crawlingstart) + '&crawlingMode=' + encodeURIComponent(crawlingMode) + '&crawlingURL=' + encodeURIComponent(crawlingURL) + '&bookmarkTitle=' + encodeURIComponent(bookmarkTitle) + '&crawlingDepth=' + encodeURIComponent(crawlingDepth) + '&directDocByURL=' + encodeURIComponent(directDocByURL) + '&crawlingDepthExtension=' + encodeURIComponent(crawlingDepthExtension) + '&range=' + encodeURIComponent(range) + '&mustmatch=' + encodeURIComponent(mustmatch) + '&mustnotmatch=' + encodeURIComponent(mustnotmatch) + '&ipMustmatch=' + encodeURIComponent(ipMustmatch) + '&ipMustnotmatch=' + encodeURIComponent(ipMustnotmatch) + '&indexmustmatch=' + encodeURIComponent(indexmustmatch) + '&indexmustnotmatch=' + encodeURIComponent(indexmustnotmatch) + '&deleteold=' + encodeURIComponent(deleteold) + '&recrawl=' + encodeURIComponent(recrawl) + '&reloadIfOlderNumber=' + encodeURIComponent(reloadIfOlderNumber) + '&reloadIfOlderUnit=' + encodeURIComponent(reloadIfOlderUnit) + '&countryMustMatchSwitch=' + encodeURIComponent(countryMustMatchSwitch) + '&crawlingDomMaxCheck=' + encodeURIComponent(crawlingDomMaxCheck) + '&crawlingQ=' + encodeURIComponent(crawlingQ) + '&storeHTCache=' + encodeURIComponent(storeHTCache) + '&cachePolicy=' + encodeURIComponent(cachePolicy) + '&indexText=' + encodeURIComponent(indexText) + '&indexMedia=' + encodeURIComponent(indexMedia) + '&crawlOrder=' + encodeURIComponent(crawlOrder) + '&collection=' + encodeURIComponent(collection); + +//GM_log(yacy_url); + +GM_xmlhttpRequest({ + method: "GET", + url: yacy_url, + onload: function(response) { + //GM_log("YaCy indexing should commence."); + } +}); +