gate.util
Class HtmlLinksExtractor
java.lang.Object
|
+--javax.swing.text.html.HTMLEditorKit.ParserCallback
|
+--gate.util.HtmlLinksExtractor
- public class HtmlLinksExtractor
- extends HTMLEditorKit.ParserCallback
This class extracts links from HTML files.
Implements the behaviour of the HTML reader.
Methods of an object of this class are called by the HTML parser when
events will appear.
Method Summary |
void |
flush()
This method is called once, when the HTML parser reaches the end
of its input streamin order to notify the parserCallback that there
is nothing more to parse. |
void |
handleComment(char[] text,
int pos)
This method is called when the HTML parser encounts a comment |
void |
handleEndTag(HTML.Tag t,
int pos)
This method is called when the HTML parser encounts the end of a tag
that means that the tag is paired by a beginning tag |
void |
handleError(String errorMsg,
int pos)
This method is called when the HTML parser encounts an error
it depends on the programmer if he wants to deal with that error |
void |
handleSimpleTag(HTML.Tag t,
MutableAttributeSet a,
int pos)
This method is called when the HTML parser encounts an empty tag |
void |
handleStartTag(HTML.Tag t,
MutableAttributeSet a,
int pos)
This method is called when the HTML parser encounts the beginning
of a tag that means that the tag is paired by an end tag and it's
not an empty one. |
void |
handleText(char[] text,
int pos)
This method is called when the HTML parser encounts text (PCDATA) |
static void |
main(String[] args)
Extract links from all .html files below a directory |
HtmlLinksExtractor
public HtmlLinksExtractor()
handleStartTag
public void handleStartTag(HTML.Tag t,
MutableAttributeSet a,
int pos)
- This method is called when the HTML parser encounts the beginning
of a tag that means that the tag is paired by an end tag and it's
not an empty one.
- Overrides:
handleStartTag
in class HTMLEditorKit.ParserCallback
handleEndTag
public void handleEndTag(HTML.Tag t,
int pos)
- This method is called when the HTML parser encounts the end of a tag
that means that the tag is paired by a beginning tag
- Overrides:
handleEndTag
in class HTMLEditorKit.ParserCallback
handleSimpleTag
public void handleSimpleTag(HTML.Tag t,
MutableAttributeSet a,
int pos)
- This method is called when the HTML parser encounts an empty tag
- Overrides:
handleSimpleTag
in class HTMLEditorKit.ParserCallback
handleText
public void handleText(char[] text,
int pos)
- This method is called when the HTML parser encounts text (PCDATA)
- Overrides:
handleText
in class HTMLEditorKit.ParserCallback
handleError
public void handleError(String errorMsg,
int pos)
- This method is called when the HTML parser encounts an error
it depends on the programmer if he wants to deal with that error
- Overrides:
handleError
in class HTMLEditorKit.ParserCallback
flush
public void flush()
throws BadLocationException
- This method is called once, when the HTML parser reaches the end
of its input streamin order to notify the parserCallback that there
is nothing more to parse.
- Overrides:
flush
in class HTMLEditorKit.ParserCallback
handleComment
public void handleComment(char[] text,
int pos)
- This method is called when the HTML parser encounts a comment
- Overrides:
handleComment
in class HTMLEditorKit.ParserCallback
main
public static void main(String[] args)
- Extract links from all .html files below a directory