|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectkr.ac.kaist.swrc.jhannanum.plugin.MajorPlugin.MorphAnalyzer.ChartMorphAnalyzer.MorphemeChart
public class MorphemeChart
This class is for the lattice style morpheme chart which is a internal data structure for morphological analysis without backtracking.
Nested Class Summary | |
---|---|
class |
MorphemeChart.Morpheme
A morpheme node in the lattice style chart. |
Field Summary | |
---|---|
private java.lang.String |
bufString
string buffer |
MorphemeChart.Morpheme[] |
chart
the morpheme chart |
int |
chartEnd
the last index of the chart |
private static java.lang.String |
CHI_REPLACE
the reserved word for replacement of Chinese characters |
private int |
chiReplaceIndex
the index for replacement of Chinese characters |
private java.util.LinkedList<java.lang.String> |
chiReplacementList
the list for the replacement of Chinese character |
private Connection |
connection
the connection rules |
private static java.lang.String |
ENG_REPLACE
the reserved word for replacement of English alphabets |
private int |
engReplaceIndex
the index for replacement of English alphabets |
private java.util.LinkedList<java.lang.String> |
engReplacementList
the list for the replacement of English alphabets |
private Exp |
exp
chart expansion |
private static int |
MAX_CANDIDATE_NUM
the maximum number of analysis results |
private static int |
MAX_MORPHEME_CHART
the maximum number of morpheme nodes in the chart |
private static int |
MAX_MORPHEME_CONNECTION
the maximum number of connections between one morpheme and others |
private static int |
MORPHEME_STATE_FAIL
the processing state - fail |
private static int |
MORPHEME_STATE_INCOMPLETE
the processing state - incomplete |
private static int |
MORPHEME_STATE_SUCCESS
the processing state - success |
private NumberDic |
numDic
number dictionary - automata |
private int |
printResultCnt
the number of analysis results printed |
private java.util.LinkedList<Eojeol> |
resEojeols
the list of eojeols analyzed |
private java.util.ArrayList<java.lang.String> |
resMorphemes
the list of morphemes analyzed |
private java.util.ArrayList<java.lang.String> |
resTags
the list of morpheme tags analyzed |
private int[] |
segmentPath
path of segmentation |
private Simti |
simti
SIMple Trie Index |
private SegmentPosition |
sp
segment position |
private Trie |
systemDic
system morpheme dictionary |
private TagSet |
tagSet
the morpheme tag set |
private Trie |
userDic
user morpheme dictionary |
Constructor Summary | |
---|---|
MorphemeChart(TagSet tagSet,
Connection connection,
Trie systemDic,
Trie userDic,
NumberDic numDic,
Simti simti,
java.util.LinkedList<Eojeol> resEojeolList)
Constructor. |
Method Summary | |
---|---|
int |
addMorpheme(int tag,
int phoneme,
int nextPosition,
int nextTagType)
Adds a new morpheme to the chart. |
int |
altSegment(java.lang.String str)
It inserts the reverse of the given string to the SIMTI data structure. |
int |
analyze()
It performs morphological analysis on the morpheme chart constructed. |
private int |
analyze(int chartIndex,
int tagType)
It performs morphological anlysis on the morpheme chart from the specified index in the chart. |
private int |
analyzeUnknown()
It segments all phonemes, and tags 'unknown' to each segment, and then performs chart analysis, so that the eojeols that consist of morphems not in the dictionaries can be processed. |
boolean |
checkChart(int[] morpheme,
int morphemeLen,
int tag,
int phoneme,
int nextPosition,
int nextTagType,
java.lang.String str)
Checks the specified morpheme is exist in the morpheme chart. |
void |
getResult()
Generates the morphological analysis result based on the morpheme chart where the analysis is performed. |
void |
init(java.lang.String word)
Initializes the morpheme chart with the specified word. |
void |
phonemeChange(int from,
java.lang.String front,
java.lang.String back,
int ftag,
int btag,
int phoneme)
It expands the morpheme chart to deal with the phoneme change phenomenon. |
private java.lang.String |
preReplace(java.lang.String str)
Replaces the English alphabets and Chinese characters in the specified string with the reserved words. |
private void |
printChart(int chartIndex)
It generates the final mophological analysis result from the morpheme chart. |
void |
printMorphemeAll()
It prints the all data in the chart to the console. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
private static final java.lang.String CHI_REPLACE
private static final java.lang.String ENG_REPLACE
private java.util.LinkedList<java.lang.String> chiReplacementList
private java.util.LinkedList<java.lang.String> engReplacementList
private int engReplaceIndex
private int chiReplaceIndex
private static final int MAX_MORPHEME_CONNECTION
private static final int MAX_MORPHEME_CHART
private static final int MORPHEME_STATE_INCOMPLETE
private static final int MORPHEME_STATE_SUCCESS
private static final int MAX_CANDIDATE_NUM
private static final int MORPHEME_STATE_FAIL
public MorphemeChart.Morpheme[] chart
public int chartEnd
private TagSet tagSet
private Connection connection
private SegmentPosition sp
private java.lang.String bufString
private int[] segmentPath
private Exp exp
private Trie systemDic
private Trie userDic
private NumberDic numDic
private Simti simti
private int printResultCnt
private java.util.LinkedList<Eojeol> resEojeols
private java.util.ArrayList<java.lang.String> resMorphemes
private java.util.ArrayList<java.lang.String> resTags
Constructor Detail |
---|
public MorphemeChart(TagSet tagSet, Connection connection, Trie systemDic, Trie userDic, NumberDic numDic, Simti simti, java.util.LinkedList<Eojeol> resEojeolList)
tagSet
- - the morpheme tag setconnection
- - the morpheme connection rulessystemDic
- - the system morpheme dictionaryuserDic
- - the user morpheme dictionarynumDic
- - the number dictionarysimti
- - the SIMple Trie IndexresEojeolList
- - the list of eojeols to store the analysis resultMethod Detail |
---|
public int addMorpheme(int tag, int phoneme, int nextPosition, int nextTagType)
tag
- - the morpheme tag IDphoneme
- - phonemenextPosition
- - the index of next morphemenextTagType
- - the tag type of next morpheme
public int altSegment(java.lang.String str)
str
- - string to insert to the SIMTI structure
public int analyze()
private int analyze(int chartIndex, int tagType)
chartIndex
- - the index of the chart to analyzetagType
- - the type of next morpheme
private int analyzeUnknown()
public boolean checkChart(int[] morpheme, int morphemeLen, int tag, int phoneme, int nextPosition, int nextTagType, java.lang.String str)
morpheme
- - the list of indices of the morphemes to checkmorphemeLen
- - the length of the listtag
- - morpheme tag IDphoneme
- - phonemenextPosition
- - the index of the next morphemenextTagType
- - the type of the next morpheme tagstr
- - plain string
public void getResult()
public void init(java.lang.String word)
word
- - the plain string of an eojeol to analyzepublic void phonemeChange(int from, java.lang.String front, java.lang.String back, int ftag, int btag, int phoneme)
from
- - the index of the start segment positionfront
- - the front part of the stringback
- - the next part of the stringftag
- - the morpheme tag of the front partbtag
- - the morpheme tag of the next partphoneme
- - phonemeprivate void printChart(int chartIndex)
chartIndex
- - the start index of the chart to generate final resultpublic void printMorphemeAll()
private java.lang.String preReplace(java.lang.String str)
str
- - the string to replace English and Chinese characters
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |