A C D E F G H I L M N P Q R S T U V

A

acceptDocument(HttpDocument) - Method in interface com.norconex.collector.http.filter.IHttpDocumentFilter
Whether to accept a HTTP document.
acceptDocument(String, HttpMetadata) - Method in interface com.norconex.collector.http.filter.IHttpHeadersFilter
Whether to accept a URL HTTP headers.
acceptDocument(HttpDocument) - Method in class com.norconex.collector.http.filter.impl.ExtensionURLFilter
 
acceptDocument(String, HttpMetadata) - Method in class com.norconex.collector.http.filter.impl.ExtensionURLFilter
 
acceptDocument(String, HttpMetadata) - Method in class com.norconex.collector.http.filter.impl.RegexHeaderFilter
 
acceptDocument(HttpDocument) - Method in class com.norconex.collector.http.filter.impl.RegexURLFilter
 
acceptDocument(String, HttpMetadata) - Method in class com.norconex.collector.http.filter.impl.RegexURLFilter
 
acceptURL(String) - Method in class com.norconex.collector.http.filter.impl.ExtensionURLFilter
 
acceptURL(String) - Method in class com.norconex.collector.http.filter.impl.RegexURLFilter
 
acceptURL(String) - Method in interface com.norconex.collector.http.filter.IURLFilter
Whether to accept this URL.
AUTH_METHOD_BASIC - Static variable in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
AUTH_METHOD_DIGEST - Static variable in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
AUTH_METHOD_FORM - Static variable in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
authenticateUsingForm(DefaultHttpClient) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 

C

clone() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
COLLECTOR_PREFIX - Static variable in class com.norconex.collector.http.doc.HttpMetadata
 
com.norconex.collector.http - package com.norconex.collector.http
 
com.norconex.collector.http.crawler - package com.norconex.collector.http.crawler
 
com.norconex.collector.http.db - package com.norconex.collector.http.db
 
com.norconex.collector.http.db.impl - package com.norconex.collector.http.db.impl
 
com.norconex.collector.http.doc - package com.norconex.collector.http.doc
 
com.norconex.collector.http.filter - package com.norconex.collector.http.filter
 
com.norconex.collector.http.filter.impl - package com.norconex.collector.http.filter.impl
 
com.norconex.collector.http.handler - package com.norconex.collector.http.handler
 
com.norconex.collector.http.handler.impl - package com.norconex.collector.http.handler.impl
 
com.norconex.collector.http.robot - package com.norconex.collector.http.robot
 
com.norconex.collector.http.util - package com.norconex.collector.http.util
 
crawl(boolean) - Method in class com.norconex.collector.http.HttpCollector
Launched all crawlers defined in configuration.
crawlerFinished(HttpCrawler) - Method in class com.norconex.collector.http.crawler.HttpCrawlerEventAdapter
 
crawlerFinished(HttpCrawler) - Method in interface com.norconex.collector.http.crawler.IHttpCrawlerEventListener
 
crawlerStarted(HttpCrawler) - Method in class com.norconex.collector.http.crawler.HttpCrawlerEventAdapter
 
crawlerStarted(HttpCrawler) - Method in interface com.norconex.collector.http.crawler.IHttpCrawlerEventListener
 
CrawlStatus - Enum in com.norconex.collector.http.crawler
 
CrawlURL - Class in com.norconex.collector.http.crawler
 
CrawlURL() - Constructor for class com.norconex.collector.http.crawler.CrawlURL
 
CrawlURLDatabaseException - Exception in com.norconex.collector.http.db
 
CrawlURLDatabaseException() - Constructor for exception com.norconex.collector.http.db.CrawlURLDatabaseException
 
CrawlURLDatabaseException(String) - Constructor for exception com.norconex.collector.http.db.CrawlURLDatabaseException
 
CrawlURLDatabaseException(Throwable) - Constructor for exception com.norconex.collector.http.db.CrawlURLDatabaseException
 
CrawlURLDatabaseException(String, Throwable) - Constructor for exception com.norconex.collector.http.db.CrawlURLDatabaseException
 
createChecksum(HttpDocument) - Method in interface com.norconex.collector.http.handler.IHttpDocumentChecksummer
 
createChecksum(Properties) - Method in interface com.norconex.collector.http.handler.IHttpHeadersChecksummer
 
createChecksum(HttpDocument) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpDocumentChecksummer
 
createChecksum(Properties) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpHeadersChecksummer
 
createCrawlURLDatabase(HttpCrawlerConfig, boolean) - Method in interface com.norconex.collector.http.db.ICrawlURLDatabaseFactory
 
createCrawlURLDatabase(HttpCrawlerConfig, boolean) - Method in class com.norconex.collector.http.db.impl.DefaultCrawlURLDatabaseFactory
 
createJobContext() - Method in class com.norconex.collector.http.crawler.HttpCrawler
 
createJobSuite() - Method in class com.norconex.collector.http.HttpCollector
 

D

DEFAULT_DELAY - Static variable in class com.norconex.collector.http.handler.impl.DefaultDelayResolver
 
DEFAULT_FIELD - Static variable in class com.norconex.collector.http.handler.impl.DefaultHttpHeadersChecksummer
 
DEFAULT_LOGS_DIR - Static variable in class com.norconex.collector.http.HttpCollectorConfig
 
DEFAULT_MAX_URL_LENGTH - Static variable in class com.norconex.collector.http.handler.impl.DefaultURLExtractor
 
DEFAULT_PROGRESS_DIR - Static variable in class com.norconex.collector.http.HttpCollectorConfig
 
DEFAULT_VALID_STATUS_CODES - Static variable in class com.norconex.collector.http.handler.impl.SimpleHttpHeadersFetcher
 
DefaultCrawlURLDatabaseFactory - Class in com.norconex.collector.http.db.impl
Default database factory creating a DerbyCrawlURLDatabase instance.
DefaultCrawlURLDatabaseFactory() - Constructor for class com.norconex.collector.http.db.impl.DefaultCrawlURLDatabaseFactory
 
DefaultDelayResolver - Class in com.norconex.collector.http.handler.impl
 
DefaultDelayResolver() - Constructor for class com.norconex.collector.http.handler.impl.DefaultDelayResolver
 
DefaultDelayResolver.DelaySchedule - Class in com.norconex.collector.http.handler.impl
 
DefaultDelayResolver.DelaySchedule(String, String, String, long) - Constructor for class com.norconex.collector.http.handler.impl.DefaultDelayResolver.DelaySchedule
 
DefaultDocumentFetcher - Class in com.norconex.collector.http.handler.impl
Default implementation of IHttpDocumentFetcher.
DefaultDocumentFetcher() - Constructor for class com.norconex.collector.http.handler.impl.DefaultDocumentFetcher
 
DefaultDocumentFetcher(int[]) - Constructor for class com.norconex.collector.http.handler.impl.DefaultDocumentFetcher
 
DefaultHttpClientInitializer - Class in com.norconex.collector.http.handler.impl
Default implementation of IHttpClientInitializer.
DefaultHttpClientInitializer() - Constructor for class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
DefaultHttpDocumentChecksummer - Class in com.norconex.collector.http.handler.impl
Default implementation of IHttpDocumentChecksummer which returns a MD5 checksum value of the extracted document content unless a given field is specified.
DefaultHttpDocumentChecksummer() - Constructor for class com.norconex.collector.http.handler.impl.DefaultHttpDocumentChecksummer
 
DefaultHttpHeadersChecksummer - Class in com.norconex.collector.http.handler.impl
Default implementation of IHttpHeadersChecksummer which simply returns the exact value of the "Last-Modified" HTTP header if no alternate header is specified.
DefaultHttpHeadersChecksummer() - Constructor for class com.norconex.collector.http.handler.impl.DefaultHttpHeadersChecksummer
 
DefaultRobotsTxtProvider - Class in com.norconex.collector.http.handler.impl
Default implementation of IRobotsTxtProvider.
DefaultRobotsTxtProvider() - Constructor for class com.norconex.collector.http.handler.impl.DefaultRobotsTxtProvider
 
DefaultURLExtractor - Class in com.norconex.collector.http.handler.impl
Default implementation of IURLExtractor.
DefaultURLExtractor() - Constructor for class com.norconex.collector.http.handler.impl.DefaultURLExtractor
 
delay(RobotsTxt, String) - Method in interface com.norconex.collector.http.handler.IDelayResolver
 
delay(RobotsTxt, String) - Method in class com.norconex.collector.http.handler.impl.DefaultDelayResolver
 
DerbyCrawlURLDatabase - Class in com.norconex.collector.http.db.impl
 
DerbyCrawlURLDatabase(HttpCrawlerConfig, boolean) - Constructor for class com.norconex.collector.http.db.impl.DerbyCrawlURLDatabase
 
DOC_CHARSET - Static variable in class com.norconex.collector.http.doc.HttpMetadata
 
DOC_MIMETYPE - Static variable in class com.norconex.collector.http.doc.HttpMetadata
 
DOC_URL - Static variable in class com.norconex.collector.http.doc.HttpMetadata
 
documentCrawled(HttpCrawler, HttpDocument) - Method in class com.norconex.collector.http.crawler.HttpCrawlerEventAdapter
 
documentCrawled(HttpCrawler, HttpDocument) - Method in interface com.norconex.collector.http.crawler.IHttpCrawlerEventListener
 
documentFetched(HttpCrawler, HttpDocument, IHttpDocumentFetcher) - Method in class com.norconex.collector.http.crawler.HttpCrawlerEventAdapter
 
documentFetched(HttpCrawler, HttpDocument, IHttpDocumentFetcher) - Method in interface com.norconex.collector.http.crawler.IHttpCrawlerEventListener
 
documentHeadersFetched(HttpCrawler, String, IHttpHeadersFetcher, Properties) - Method in class com.norconex.collector.http.crawler.HttpCrawlerEventAdapter
 
documentHeadersFetched(HttpCrawler, String, IHttpHeadersFetcher, Properties) - Method in interface com.norconex.collector.http.crawler.IHttpCrawlerEventListener
 
documentHeadersRejected(HttpCrawler, String, IHttpHeadersFilter, Properties) - Method in class com.norconex.collector.http.crawler.HttpCrawlerEventAdapter
 
documentHeadersRejected(HttpCrawler, String, IHttpHeadersFilter, Properties) - Method in interface com.norconex.collector.http.crawler.IHttpCrawlerEventListener
 
documentImported(HttpCrawler, HttpDocument) - Method in class com.norconex.collector.http.crawler.HttpCrawlerEventAdapter
 
documentImported(HttpCrawler, HttpDocument) - Method in interface com.norconex.collector.http.crawler.IHttpCrawlerEventListener
 
documentPostProcessed(HttpCrawler, HttpDocument, IHttpDocumentProcessor) - Method in class com.norconex.collector.http.crawler.HttpCrawlerEventAdapter
 
documentPostProcessed(HttpCrawler, HttpDocument, IHttpDocumentProcessor) - Method in interface com.norconex.collector.http.crawler.IHttpCrawlerEventListener
 
documentPreProcessed(HttpCrawler, HttpDocument, IHttpDocumentProcessor) - Method in class com.norconex.collector.http.crawler.HttpCrawlerEventAdapter
 
documentPreProcessed(HttpCrawler, HttpDocument, IHttpDocumentProcessor) - Method in interface com.norconex.collector.http.crawler.IHttpCrawlerEventListener
 
documentRejected(HttpCrawler, HttpDocument, IHttpDocumentFilter) - Method in class com.norconex.collector.http.crawler.HttpCrawlerEventAdapter
 
documentRejected(HttpCrawler, HttpDocument, IHttpDocumentFilter) - Method in interface com.norconex.collector.http.crawler.IHttpCrawlerEventListener
 
documentRobotsTxtRejected(HttpCrawler, String, IURLFilter, RobotsTxt) - Method in class com.norconex.collector.http.crawler.HttpCrawlerEventAdapter
 
documentRobotsTxtRejected(HttpCrawler, String, IURLFilter, RobotsTxt) - Method in interface com.norconex.collector.http.crawler.IHttpCrawlerEventListener
 
documentURLRejected(HttpCrawler, String, IURLFilter) - Method in class com.norconex.collector.http.crawler.HttpCrawlerEventAdapter
 
documentURLRejected(HttpCrawler, String, IURLFilter) - Method in interface com.norconex.collector.http.crawler.IHttpCrawlerEventListener
 
documentURLsExtracted(HttpCrawler, HttpDocument) - Method in class com.norconex.collector.http.crawler.HttpCrawlerEventAdapter
 
documentURLsExtracted(HttpCrawler, HttpDocument) - Method in interface com.norconex.collector.http.crawler.IHttpCrawlerEventListener
 

E

equals(Object) - Method in class com.norconex.collector.http.crawler.CrawlURL
 
equals(Object) - Method in class com.norconex.collector.http.filter.impl.ExtensionURLFilter
 
equals(Object) - Method in class com.norconex.collector.http.filter.impl.RegexHeaderFilter
 
equals(Object) - Method in class com.norconex.collector.http.filter.impl.RegexURLFilter
 
equals(Object) - Method in class com.norconex.collector.http.handler.impl.DefaultDelayResolver.DelaySchedule
 
equals(Object) - Method in class com.norconex.collector.http.handler.impl.GenericURLNormalizer
 
equals(Object) - Method in class com.norconex.collector.http.handler.impl.GenericURLNormalizer.Replace
 
ExtensionURLFilter - Class in com.norconex.collector.http.filter.impl
Filters URL based on coma-separated list of file extensions.
ExtensionURLFilter() - Constructor for class com.norconex.collector.http.filter.impl.ExtensionURLFilter
 
ExtensionURLFilter(String) - Constructor for class com.norconex.collector.http.filter.impl.ExtensionURLFilter
 
ExtensionURLFilter(String, OnMatch) - Constructor for class com.norconex.collector.http.filter.impl.ExtensionURLFilter
 
ExtensionURLFilter(String, OnMatch, boolean) - Constructor for class com.norconex.collector.http.filter.impl.ExtensionURLFilter
 
extractURLs(Reader, String, ContentType) - Method in class com.norconex.collector.http.handler.impl.DefaultURLExtractor
 
extractURLs(Reader, String, ContentType) - Method in interface com.norconex.collector.http.handler.IURLExtractor
Extracts URLs out of a document.

F

fetchDocument(DefaultHttpClient, HttpDocument) - Method in interface com.norconex.collector.http.handler.IHttpDocumentFetcher
Fetches HTTP document and saves it to a local file
fetchDocument(DefaultHttpClient, HttpDocument) - Method in class com.norconex.collector.http.handler.impl.DefaultDocumentFetcher
 
fetchHTTPHeaders(DefaultHttpClient, String) - Method in interface com.norconex.collector.http.handler.IHttpHeadersFetcher
Returning null means the headers could not be fetched and the associated document will be skipped (treated as rejected).
fetchHTTPHeaders(DefaultHttpClient, String) - Method in class com.norconex.collector.http.handler.impl.SimpleHttpHeadersFetcher
 

G

GenericURLNormalizer - Class in com.norconex.collector.http.handler.impl
Generic implementation of IURLNormalizer that should satisfy most URL normalization needs.
GenericURLNormalizer() - Constructor for class com.norconex.collector.http.handler.impl.GenericURLNormalizer
 
GenericURLNormalizer.Normalization - Enum in com.norconex.collector.http.handler.impl
 
GenericURLNormalizer.Replace - Class in com.norconex.collector.http.handler.impl
 
GenericURLNormalizer.Replace(String) - Constructor for class com.norconex.collector.http.handler.impl.GenericURLNormalizer.Replace
 
GenericURLNormalizer.Replace(String, String) - Constructor for class com.norconex.collector.http.handler.impl.GenericURLNormalizer.Replace
 
getActiveCount() - Method in interface com.norconex.collector.http.db.ICrawlURLDatabase
Gets the number of active URLs (currently being processed).
getActiveCount() - Method in class com.norconex.collector.http.db.impl.DerbyCrawlURLDatabase
 
getAuthMethod() - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
getAuthPassword() - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
getAuthPasswordField() - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
getAuthURL() - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
getAuthUsername() - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
getAuthUsernameField() - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
getCached(String) - Method in interface com.norconex.collector.http.db.ICrawlURLDatabase
Gets the cached URL from previous time crawler was run (e.g.
getCached(String) - Method in class com.norconex.collector.http.db.impl.DerbyCrawlURLDatabase
 
getCommitter() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getConfigurationFile() - Method in class com.norconex.collector.http.HttpCollector
 
getContentType() - Method in class com.norconex.collector.http.doc.HttpMetadata
 
getCrawlDelay() - Method in class com.norconex.collector.http.robot.RobotsTxt
 
getCrawlerConfigs() - Method in class com.norconex.collector.http.HttpCollectorConfig
 
getCrawlerListeners() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getCrawlers() - Method in class com.norconex.collector.http.HttpCollector
 
getCrawlURLDatabaseFactory() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getDayOfMonthRange() - Method in class com.norconex.collector.http.handler.impl.DefaultDelayResolver.DelaySchedule
 
getDayOfWeekRange() - Method in class com.norconex.collector.http.handler.impl.DefaultDelayResolver.DelaySchedule
 
getDefaultDelay() - Method in class com.norconex.collector.http.handler.impl.DefaultDelayResolver
Gets the default delay in milliseconds.
getDelay() - Method in class com.norconex.collector.http.handler.impl.DefaultDelayResolver.DelaySchedule
 
getDelayResolver() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getDepth() - Method in class com.norconex.collector.http.crawler.CrawlURL
 
getDocChecksum() - Method in class com.norconex.collector.http.crawler.CrawlURL
 
getDocumentUrl() - Method in class com.norconex.collector.http.doc.HttpMetadata
 
getDocumentUrls() - Method in class com.norconex.collector.http.doc.HttpMetadata
 
getExtensions() - Method in class com.norconex.collector.http.filter.impl.ExtensionURLFilter
 
getField() - Method in class com.norconex.collector.http.handler.impl.DefaultHttpDocumentChecksummer
 
getField() - Method in class com.norconex.collector.http.handler.impl.DefaultHttpHeadersChecksummer
 
getFilters() - Method in class com.norconex.collector.http.robot.RobotsTxt
 
getHeadChecksum() - Method in class com.norconex.collector.http.crawler.CrawlURL
 
getHeader() - Method in class com.norconex.collector.http.filter.impl.RegexHeaderFilter
 
getHeadersPrefix() - Method in class com.norconex.collector.http.handler.impl.DefaultDocumentFetcher
 
getHeadersPrefix() - Method in class com.norconex.collector.http.handler.impl.SimpleHttpHeadersFetcher
 
getHttpClientInitializer() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getHttpDocumentChecksummer() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getHttpDocumentFetcher() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getHttpDocumentfilters() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getHttpHeadersChecksummer() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getHttpHeadersFetcher() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getHttpHeadersFilters() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getId() - Method in class com.norconex.collector.http.crawler.HttpCrawler
 
getId() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getId() - Method in class com.norconex.collector.http.HttpCollectorConfig
 
getImporterConfig() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getLocalFile() - Method in class com.norconex.collector.http.doc.HttpDocument
 
getLogsDir() - Method in class com.norconex.collector.http.HttpCollectorConfig
 
getMatch() - Method in class com.norconex.collector.http.handler.impl.GenericURLNormalizer.Replace
 
getMaxDepth() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getMaxURLLength() - Method in class com.norconex.collector.http.handler.impl.DefaultURLExtractor
 
getMaxURLs() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getMetadata() - Method in class com.norconex.collector.http.doc.HttpDocument
 
getNormalizations() - Method in class com.norconex.collector.http.handler.impl.GenericURLNormalizer
 
getNumThreads() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getPostImportProcessors() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getPreImportProcessors() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getProcessedCount() - Method in interface com.norconex.collector.http.db.ICrawlURLDatabase
Gets the number of URLs processed.
getProcessedCount() - Method in class com.norconex.collector.http.db.impl.DerbyCrawlURLDatabase
 
getProgressDir() - Method in class com.norconex.collector.http.HttpCollectorConfig
 
getProxyHost() - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
getProxyPassword() - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
getProxyPort() - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
getProxyRealm() - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
getProxyUsername() - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
getQueueSize() - Method in interface com.norconex.collector.http.db.ICrawlURLDatabase
Gets the size of the URL queue (number of URLs left to process).
getQueueSize() - Method in class com.norconex.collector.http.db.impl.DerbyCrawlURLDatabase
 
getRegex() - Method in class com.norconex.collector.http.filter.impl.RegexHeaderFilter
 
getRegex() - Method in class com.norconex.collector.http.filter.impl.RegexURLFilter
 
getReplacement() - Method in class com.norconex.collector.http.handler.impl.GenericURLNormalizer.Replace
 
getReplaces() - Method in class com.norconex.collector.http.handler.impl.GenericURLNormalizer
 
getRobotsTxt(DefaultHttpClient, String) - Method in class com.norconex.collector.http.handler.impl.DefaultRobotsTxtProvider
 
getRobotsTxt(DefaultHttpClient, String) - Method in interface com.norconex.collector.http.handler.IRobotsTxtProvider
 
getRobotsTxtProvider() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getSchedules() - Method in class com.norconex.collector.http.handler.impl.DefaultDelayResolver
 
getStartURLs() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getStatus() - Method in class com.norconex.collector.http.crawler.CrawlURL
 
getTimeRange() - Method in class com.norconex.collector.http.handler.impl.DefaultDelayResolver.DelaySchedule
 
getUrl() - Method in class com.norconex.collector.http.crawler.CrawlURL
 
getUrl() - Method in class com.norconex.collector.http.doc.HttpDocument
 
getUrlExtractor() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getURLFilters() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getUrlNormalizer() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
getUserAgent() - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
getValidStatusCodes() - Method in class com.norconex.collector.http.handler.impl.DefaultDocumentFetcher
 
getValidStatusCodes() - Method in class com.norconex.collector.http.handler.impl.SimpleHttpHeadersFetcher
 
getVariablesFile() - Method in class com.norconex.collector.http.HttpCollector
 
getWorkDir() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 

H

hashCode() - Method in class com.norconex.collector.http.crawler.CrawlURL
 
hashCode() - Method in class com.norconex.collector.http.filter.impl.ExtensionURLFilter
 
hashCode() - Method in class com.norconex.collector.http.filter.impl.RegexHeaderFilter
 
hashCode() - Method in class com.norconex.collector.http.filter.impl.RegexURLFilter
 
hashCode() - Method in class com.norconex.collector.http.handler.impl.DefaultDelayResolver.DelaySchedule
 
hashCode() - Method in class com.norconex.collector.http.handler.impl.GenericURLNormalizer
 
hashCode() - Method in class com.norconex.collector.http.handler.impl.GenericURLNormalizer.Replace
 
HTTP_CONTENT_LENGTH - Static variable in class com.norconex.collector.http.doc.HttpMetadata
 
HTTP_CONTENT_TYPE - Static variable in class com.norconex.collector.http.doc.HttpMetadata
 
HttpCollector - Class in com.norconex.collector.http
Main application class.
HttpCollector() - Constructor for class com.norconex.collector.http.HttpCollector
Creates a non-configured HTTP collector.
HttpCollector(File, File) - Constructor for class com.norconex.collector.http.HttpCollector
Creates an HTTP Collector configured using the provided configuration fine and variable files.
HttpCollector(HttpCollectorConfig) - Constructor for class com.norconex.collector.http.HttpCollector
Creates and configure an HTTP Collector with the provided configuration.
HttpCollectorConfig - Class in com.norconex.collector.http
HTTP Collector configuration.
HttpCollectorConfig(String) - Constructor for class com.norconex.collector.http.HttpCollectorConfig
Creates a new collector with the given unique id.
HttpCollectorConfigLoader - Class in com.norconex.collector.http
HTTP Collector configuration loader.
HttpCollectorException - Exception in com.norconex.collector.http
Runtime exception for most unrecoverable issues thrown by HTTP Collector classes.
HttpCollectorException() - Constructor for exception com.norconex.collector.http.HttpCollectorException
 
HttpCollectorException(String) - Constructor for exception com.norconex.collector.http.HttpCollectorException
 
HttpCollectorException(Throwable) - Constructor for exception com.norconex.collector.http.HttpCollectorException
 
HttpCollectorException(String, Throwable) - Constructor for exception com.norconex.collector.http.HttpCollectorException
 
HttpCrawler - Class in com.norconex.collector.http.crawler
 
HttpCrawler(HttpCrawlerConfig) - Constructor for class com.norconex.collector.http.crawler.HttpCrawler
 
HttpCrawlerConfig - Class in com.norconex.collector.http.crawler
 
HttpCrawlerConfig() - Constructor for class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
HttpCrawlerConfigLoader - Class in com.norconex.collector.http.crawler
 
HttpCrawlerEventAdapter - Class in com.norconex.collector.http.crawler
Adapter for IHttpCrawlerEventListener.
HttpCrawlerEventAdapter() - Constructor for class com.norconex.collector.http.crawler.HttpCrawlerEventAdapter
 
HttpDocument - Class in com.norconex.collector.http.doc
 
HttpDocument(String, File) - Constructor for class com.norconex.collector.http.doc.HttpDocument
 
HttpMetadata - Class in com.norconex.collector.http.doc
 
HttpMetadata(String) - Constructor for class com.norconex.collector.http.doc.HttpMetadata
 

I

ICrawlURLDatabase - Interface in com.norconex.collector.http.db
Database implementation holding necessary information about all URL crawling activities, what crawling stages URLs are in.
ICrawlURLDatabaseFactory - Interface in com.norconex.collector.http.db
 
IDelayResolver - Interface in com.norconex.collector.http.handler
Resolves and creates "delays" between each document crawled.
IHttpClientInitializer - Interface in com.norconex.collector.http.handler
Initializes an HTTP Connection.
IHttpCrawlerEventListener - Interface in com.norconex.collector.http.crawler
Allows implementers to react to any crawler-specific events.
IHttpDocumentChecksummer - Interface in com.norconex.collector.http.handler
 
IHttpDocumentFetcher - Interface in com.norconex.collector.http.handler
Fetches the HTTP document and its metadata (HTTP Headers).
IHttpDocumentFilter - Interface in com.norconex.collector.http.filter
Filter a document after the document content is downloaded.
IHttpDocumentProcessor - Interface in com.norconex.collector.http.handler
Custom processing (optional) performed on a document.
IHttpHeadersChecksummer - Interface in com.norconex.collector.http.handler
 
IHttpHeadersFetcher - Interface in com.norconex.collector.http.handler
Fetches the HTTP Header, typically via a HEAD request.
IHttpHeadersFilter - Interface in com.norconex.collector.http.filter
Filter a document based on their HTTP headers, before the document content is downloaded.
initializeHTTPClient(DefaultHttpClient) - Method in interface com.norconex.collector.http.handler.IHttpClientInitializer
 
initializeHTTPClient(DefaultHttpClient) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
IRobotsMetaBuilder - Interface in com.norconex.collector.http.handler
 
IRobotsTxtProvider - Interface in com.norconex.collector.http.handler
Given a URL, extract any "robots.txt" rules.
isActive(String) - Method in interface com.norconex.collector.http.db.ICrawlURLDatabase
Whether the given URL is currently being processed (i.e.
isActive(String) - Method in class com.norconex.collector.http.db.impl.DerbyCrawlURLDatabase
 
isAllowedFromMetadata(Properties) - Method in interface com.norconex.collector.http.handler.IRobotsMetaBuilder
 
isCacheEmpty() - Method in interface com.norconex.collector.http.db.ICrawlURLDatabase
Whether there are any URLs the the cache from a previous crawler run.
isCacheEmpty() - Method in class com.norconex.collector.http.db.impl.DerbyCrawlURLDatabase
 
isCaseSensitive() - Method in class com.norconex.collector.http.filter.impl.ExtensionURLFilter
 
isCaseSensitive() - Method in class com.norconex.collector.http.filter.impl.RegexHeaderFilter
 
isCaseSensitive() - Method in class com.norconex.collector.http.filter.impl.RegexURLFilter
 
isCookiesDisabled() - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
isCurrentTimeInSchedule() - Method in class com.norconex.collector.http.handler.impl.DefaultDelayResolver.DelaySchedule
 
isDeleteOrphans() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
isIgnoreRobotsCrawlDelay() - Method in class com.norconex.collector.http.handler.impl.DefaultDelayResolver
 
isIgnoreRobotsTxt() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
isKeepDownloads() - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
isProcessed(String) - Method in interface com.norconex.collector.http.db.ICrawlURLDatabase
Whether the given URL has been processed.
isProcessed(String) - Method in class com.norconex.collector.http.db.impl.DerbyCrawlURLDatabase
 
isQueued(String) - Method in interface com.norconex.collector.http.db.ICrawlURLDatabase
Whether the given URL is in the queue or not (waiting to be processed).
isQueued(String) - Method in class com.norconex.collector.http.db.impl.DerbyCrawlURLDatabase
 
isQueueEmpty() - Method in interface com.norconex.collector.http.db.ICrawlURLDatabase
Whether there are any URLs to process in the queue.
isQueueEmpty() - Method in class com.norconex.collector.http.db.impl.DerbyCrawlURLDatabase
 
isStopped() - Method in class com.norconex.collector.http.crawler.HttpCrawler
 
isVanished(CrawlURL) - Method in interface com.norconex.collector.http.db.ICrawlURLDatabase
Whether a url has been deleted.
isVanished(CrawlURL) - Method in class com.norconex.collector.http.db.impl.DerbyCrawlURLDatabase
 
IURLExtractor - Interface in com.norconex.collector.http.handler
Responsible for extracting URLs out of a document.
IURLFilter - Interface in com.norconex.collector.http.filter
Filter a document based on its URL, before any download of HTTP headers or content of a document.
IURLNormalizer - Interface in com.norconex.collector.http.handler
Responsible for normalizing URLs.

L

loadCollectorConfig(File, File) - Static method in class com.norconex.collector.http.HttpCollectorConfigLoader
 
loadCrawlerConfigs(File, File) - Static method in class com.norconex.collector.http.crawler.HttpCrawlerConfigLoader
 
loadCrawlerConfigs(HierarchicalConfiguration) - Static method in class com.norconex.collector.http.crawler.HttpCrawlerConfigLoader
 
loadFromXML(Reader) - Method in class com.norconex.collector.http.filter.impl.ExtensionURLFilter
 
loadFromXML(Reader) - Method in class com.norconex.collector.http.filter.impl.RegexHeaderFilter
 
loadFromXML(Reader) - Method in class com.norconex.collector.http.filter.impl.RegexURLFilter
 
loadFromXML(Reader) - Method in class com.norconex.collector.http.handler.impl.DefaultDelayResolver
 
loadFromXML(Reader) - Method in class com.norconex.collector.http.handler.impl.DefaultDocumentFetcher
 
loadFromXML(Reader) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
loadFromXML(Reader) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpDocumentChecksummer
 
loadFromXML(Reader) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpHeadersChecksummer
 
loadFromXML(Reader) - Method in class com.norconex.collector.http.handler.impl.DefaultURLExtractor
 
loadFromXML(Reader) - Method in class com.norconex.collector.http.handler.impl.GenericURLNormalizer
 
loadFromXML(Reader) - Method in class com.norconex.collector.http.handler.impl.SimpleHttpHeadersFetcher
 

M

main(String[]) - Static method in class com.norconex.collector.http.HttpCollector
Invokes the HTTP Collector from the command line.

N

next() - Method in interface com.norconex.collector.http.db.ICrawlURLDatabase
Returns the next URL to be processed and marks it as being "active" (i.e.
next() - Method in class com.norconex.collector.http.db.impl.DerbyCrawlURLDatabase
 
normalizeURL(String) - Method in class com.norconex.collector.http.handler.impl.GenericURLNormalizer
 
normalizeURL(String) - Method in interface com.norconex.collector.http.handler.IURLNormalizer
Normalize the given URL.

P

PathUtils - Class in com.norconex.collector.http.util
 
processDocument(DefaultHttpClient, HttpDocument) - Method in interface com.norconex.collector.http.handler.IHttpDocumentProcessor
Processes a document.
processed(CrawlURL) - Method in interface com.norconex.collector.http.db.ICrawlURLDatabase
Marks this URL as processed.
processed(CrawlURL) - Method in class com.norconex.collector.http.db.impl.DerbyCrawlURLDatabase
 
processURL() - Method in interface com.norconex.collector.http.crawler.URLProcessor.IURLProcessingStep
 
processURL() - Method in class com.norconex.collector.http.crawler.URLProcessor
 

Q

queue(String, int) - Method in interface com.norconex.collector.http.db.ICrawlURLDatabase
Queues a URL for future processing.
queue(String, int) - Method in class com.norconex.collector.http.db.impl.DerbyCrawlURLDatabase
 
queueCache() - Method in interface com.norconex.collector.http.db.ICrawlURLDatabase
Queues URLs cached from a previous run so they can be processed again.
queueCache() - Method in class com.norconex.collector.http.db.impl.DerbyCrawlURLDatabase
 

R

REFERNCED_URLS - Static variable in class com.norconex.collector.http.doc.HttpMetadata
 
RegexHeaderFilter - Class in com.norconex.collector.http.filter.impl
Accepts or rejects one or more HTTP header values using regular expression.
RegexHeaderFilter() - Constructor for class com.norconex.collector.http.filter.impl.RegexHeaderFilter
 
RegexHeaderFilter(String, String) - Constructor for class com.norconex.collector.http.filter.impl.RegexHeaderFilter
 
RegexHeaderFilter(String, String, OnMatch) - Constructor for class com.norconex.collector.http.filter.impl.RegexHeaderFilter
 
RegexHeaderFilter(String, String, OnMatch, boolean) - Constructor for class com.norconex.collector.http.filter.impl.RegexHeaderFilter
 
RegexURLFilter - Class in com.norconex.collector.http.filter.impl
Filters URL based on a regular expression.
RegexURLFilter() - Constructor for class com.norconex.collector.http.filter.impl.RegexURLFilter
 
RegexURLFilter(String) - Constructor for class com.norconex.collector.http.filter.impl.RegexURLFilter
 
RegexURLFilter(String, OnMatch) - Constructor for class com.norconex.collector.http.filter.impl.RegexURLFilter
 
RegexURLFilter(String, OnMatch, boolean) - Constructor for class com.norconex.collector.http.filter.impl.RegexURLFilter
 
resumeExecution(JobProgress, JobSuite) - Method in class com.norconex.collector.http.crawler.HttpCrawler
 
RobotsMeta - Class in com.norconex.collector.http.robot
 
RobotsMeta() - Constructor for class com.norconex.collector.http.robot.RobotsMeta
 
RobotsTxt - Class in com.norconex.collector.http.robot
 
RobotsTxt(IURLFilter[]) - Constructor for class com.norconex.collector.http.robot.RobotsTxt
 
RobotsTxt(IURLFilter[], float) - Constructor for class com.norconex.collector.http.robot.RobotsTxt
 

S

saveToXML(Writer) - Method in class com.norconex.collector.http.filter.impl.ExtensionURLFilter
 
saveToXML(Writer) - Method in class com.norconex.collector.http.filter.impl.RegexHeaderFilter
 
saveToXML(Writer) - Method in class com.norconex.collector.http.filter.impl.RegexURLFilter
 
saveToXML(Writer) - Method in class com.norconex.collector.http.handler.impl.DefaultDelayResolver
 
saveToXML(Writer) - Method in class com.norconex.collector.http.handler.impl.DefaultDocumentFetcher
 
saveToXML(Writer) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
saveToXML(Writer) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpDocumentChecksummer
 
saveToXML(Writer) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpHeadersChecksummer
 
saveToXML(Writer) - Method in class com.norconex.collector.http.handler.impl.DefaultURLExtractor
 
saveToXML(Writer) - Method in class com.norconex.collector.http.handler.impl.GenericURLNormalizer
 
saveToXML(Writer) - Method in class com.norconex.collector.http.handler.impl.SimpleHttpHeadersFetcher
 
setAuthMethod(String) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
setAuthPassword(String) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
setAuthPasswordField(String) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
setAuthURL(String) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
setAuthUsername(String) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
setAuthUsernameField(String) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
setCaseSensitive(boolean) - Method in class com.norconex.collector.http.filter.impl.ExtensionURLFilter
 
setCaseSensitive(boolean) - Method in class com.norconex.collector.http.filter.impl.RegexHeaderFilter
 
setCaseSensitive(boolean) - Method in class com.norconex.collector.http.filter.impl.RegexURLFilter
 
setCommitter(ICommitter) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setConfigurationFile(File) - Method in class com.norconex.collector.http.HttpCollector
 
setCookiesDisabled(boolean) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
setCrawlerConfigs(HttpCrawlerConfig[]) - Method in class com.norconex.collector.http.HttpCollectorConfig
 
setCrawlerListeners(IHttpCrawlerEventListener[]) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setCrawlers(HttpCrawler[]) - Method in class com.norconex.collector.http.HttpCollector
 
setCrawlURLDatabaseFactory(ICrawlURLDatabaseFactory) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setDefaultDelay(long) - Method in class com.norconex.collector.http.handler.impl.DefaultDelayResolver
Sets the default delay in milliseconds.
setDelayResolver(IDelayResolver) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setDeleteOrphans(boolean) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setDepth(int) - Method in class com.norconex.collector.http.crawler.CrawlURL
 
setDocChecksum(String) - Method in class com.norconex.collector.http.crawler.CrawlURL
 
setExtensions(String) - Method in class com.norconex.collector.http.filter.impl.ExtensionURLFilter
 
setField(String) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpDocumentChecksummer
 
setField(String) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpHeadersChecksummer
 
setHeadChecksum(String) - Method in class com.norconex.collector.http.crawler.CrawlURL
 
setHeader(String) - Method in class com.norconex.collector.http.filter.impl.RegexHeaderFilter
 
setHeadersPrefix(String) - Method in class com.norconex.collector.http.handler.impl.DefaultDocumentFetcher
 
setHeadersPrefix(String) - Method in class com.norconex.collector.http.handler.impl.SimpleHttpHeadersFetcher
 
setHttpClientInitializer(IHttpClientInitializer) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setHttpDocumentChecksummer(IHttpDocumentChecksummer) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setHttpDocumentFetcher(IHttpDocumentFetcher) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setHttpDocumentfilters(IHttpDocumentFilter[]) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setHttpHeadersChecksummer(IHttpHeadersChecksummer) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setHttpHeadersFetcher(IHttpHeadersFetcher) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setHttpHeadersFilters(IHttpHeadersFilter[]) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setId(String) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setId(String) - Method in class com.norconex.collector.http.HttpCollectorConfig
 
setIgnoreRobotsCrawlDelay(boolean) - Method in class com.norconex.collector.http.handler.impl.DefaultDelayResolver
 
setIgnoreRobotsTxt(boolean) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setImporterConfig(ImporterConfig) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setKeepDownloads(boolean) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setLogsDir(String) - Method in class com.norconex.collector.http.HttpCollectorConfig
 
setMaxDepth(int) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setMaxURLLength(int) - Method in class com.norconex.collector.http.handler.impl.DefaultURLExtractor
 
setMaxURLs(int) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setNormalizations(GenericURLNormalizer.Normalization...) - Method in class com.norconex.collector.http.handler.impl.GenericURLNormalizer
 
setNumThreads(int) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setPostImportProcessors(IHttpDocumentProcessor[]) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setPreImportProcessors(IHttpDocumentProcessor[]) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setProgressDir(String) - Method in class com.norconex.collector.http.HttpCollectorConfig
 
setProxyHost(String) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
setProxyPassword(String) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
setProxyPort(int) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
setProxyRealm(String) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
setProxyUsername(String) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
setRegex(String) - Method in class com.norconex.collector.http.filter.impl.RegexHeaderFilter
 
setRegex(String) - Method in class com.norconex.collector.http.filter.impl.RegexURLFilter
 
setReplaces(GenericURLNormalizer.Replace...) - Method in class com.norconex.collector.http.handler.impl.GenericURLNormalizer
 
setRobotsTxtProvider(IRobotsTxtProvider) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setSchedules(List<DefaultDelayResolver.DelaySchedule>) - Method in class com.norconex.collector.http.handler.impl.DefaultDelayResolver
 
setStartURLs(String[]) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setStatus(CrawlStatus) - Method in class com.norconex.collector.http.crawler.CrawlURL
 
setUrl(String) - Method in class com.norconex.collector.http.crawler.CrawlURL
 
setUrlExtractor(IURLExtractor) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setURLFilters(IURLFilter[]) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setUrlNormalizer(IURLNormalizer) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
setUserAgent(String) - Method in class com.norconex.collector.http.handler.impl.DefaultHttpClientInitializer
 
setValidStatusCodes(int[]) - Method in class com.norconex.collector.http.handler.impl.DefaultDocumentFetcher
 
setValidStatusCodes(int[]) - Method in class com.norconex.collector.http.handler.impl.SimpleHttpHeadersFetcher
 
setVariablesFile(File) - Method in class com.norconex.collector.http.HttpCollector
 
setWorkDir(File) - Method in class com.norconex.collector.http.crawler.HttpCrawlerConfig
 
SimpleHttpHeadersFetcher - Class in com.norconex.collector.http.handler.impl
Basic implementation of IHttpHeadersFetcher.
SimpleHttpHeadersFetcher() - Constructor for class com.norconex.collector.http.handler.impl.SimpleHttpHeadersFetcher
 
SimpleHttpHeadersFetcher(int[]) - Constructor for class com.norconex.collector.http.handler.impl.SimpleHttpHeadersFetcher
 
startExecution(JobProgress, JobSuite) - Method in class com.norconex.collector.http.crawler.HttpCrawler
 
stop(IJobStatus, JobSuite) - Method in class com.norconex.collector.http.crawler.HttpCrawler
 
stop() - Method in class com.norconex.collector.http.HttpCollector
Stops a running instance of this HTTP Collector.

T

toString() - Method in class com.norconex.collector.http.crawler.CrawlURL
 
toString() - Method in class com.norconex.collector.http.filter.impl.ExtensionURLFilter
 
toString() - Method in class com.norconex.collector.http.filter.impl.RegexHeaderFilter
 
toString() - Method in class com.norconex.collector.http.filter.impl.RegexURLFilter
 
toString() - Method in class com.norconex.collector.http.handler.impl.DefaultDelayResolver.DelaySchedule
 
toString() - Method in class com.norconex.collector.http.handler.impl.GenericURLNormalizer.Replace
 
toString() - Method in class com.norconex.collector.http.handler.impl.GenericURLNormalizer
 

U

UNSPECIFIED_CRAWL_DELAY - Static variable in class com.norconex.collector.http.robot.RobotsTxt
 
URLProcessor - Class in com.norconex.collector.http.crawler
Holds the URL processing logic in various processing "step" for better readability and maintainability.
URLProcessor(HttpCrawler, DefaultHttpClient, ICrawlURLDatabase, File, HttpDocument, CrawlURL) - Constructor for class com.norconex.collector.http.crawler.URLProcessor
 
URLProcessor.IURLProcessingStep - Interface in com.norconex.collector.http.crawler
 
urlToPath(String) - Static method in class com.norconex.collector.http.util.PathUtils
 

V

valueOf(String) - Static method in enum com.norconex.collector.http.crawler.CrawlStatus
Returns the enum constant of this type with the specified name.
valueOf(String) - Static method in enum com.norconex.collector.http.handler.impl.GenericURLNormalizer.Normalization
Returns the enum constant of this type with the specified name.
values() - Static method in enum com.norconex.collector.http.crawler.CrawlStatus
Returns an array containing the constants of this enum type, in the order they are declared.
values() - Static method in enum com.norconex.collector.http.handler.impl.GenericURLNormalizer.Normalization
Returns an array containing the constants of this enum type, in the order they are declared.

A C D E F G H I L M N P Q R S T U V

Copyright © 2009-2013 Norconex Inc.. All Rights Reserved.