FudanNLP is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 |
10 | *
FudanNLP is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 |
15 | *
You should have received a copy of the GNU General Public License
16 | * along with FudanNLP. If not, see
17 | * http://www.gnu.org/licenses/.
18 |
19 | *
Copyright 2009-2012 fnlp.org. All rights reserved.
20 | *
21 | * @author fnlp.org
22 | * @since FudanNLP 1.5
23 | * @version 1.0.0
24 | *
25 | */
26 | package org.fnlp.app.keyword;
--------------------------------------------------------------------------------
/fnlp-app/src/main/java/org/fnlp/app/lucene/POSAttribute.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.app.lucene;
21 |
22 | import org.apache.lucene.util.Attribute;
23 |
24 | public interface POSAttribute extends Attribute {
25 |
26 | public void setPartOfSpeech(String pos);
27 |
28 | public String getPartOfSpeech();
29 | }
--------------------------------------------------------------------------------
/fnlp-app/src/main/java/org/fnlp/app/lucene/POSAttributeImpl.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.app.lucene;
21 |
22 | import org.apache.lucene.util.AttributeImpl;
23 |
24 | public final class POSAttributeImpl extends AttributeImpl
25 | implements POSAttribute {
26 |
27 | private String pos = "";
28 |
29 | public void setPartOfSpeech(String pos) {
30 | this.pos = pos;
31 | }
32 |
33 | public String getPartOfSpeech() {
34 | return pos;
35 | }
36 | @Override
37 | public void clear() {
38 | pos = "";
39 | }
40 | @Override
41 | public void copyTo(AttributeImpl target) {
42 | ((POSAttribute) target).setPartOfSpeech(pos);
43 | }
44 | }
--------------------------------------------------------------------------------
/fnlp-app/src/main/java/org/fnlp/app/lucene/POSTaggingFilter.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.app.lucene;
21 |
22 | import java.io.IOException;
23 |
24 | import org.apache.lucene.analysis.TokenStream;
25 |
26 | import org.fnlp.nlp.cn.Tags;
27 |
28 | public final class POSTaggingFilter extends FilteringTokenFilter {
29 |
30 |
31 | private final POSAttribute posAtt = addAttribute(POSAttribute.class);
32 |
33 |
34 | public POSTaggingFilter(boolean enablePositionIncrements, TokenStream in) {
35 | super(enablePositionIncrements, in);
36 | }
37 |
38 | @Override
39 | public boolean accept() throws IOException {
40 | String pos = posAtt.getPartOfSpeech();
41 | return !Tags.isStopword(pos);
42 | }
43 | }
--------------------------------------------------------------------------------
/fnlp-app/src/main/java/org/fnlp/app/lucene/WordType.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.app.lucene;
21 |
22 | /**
23 | * Internal SmartChineseAnalyzer token type constants
24 | * @lucene.experimental
25 | */
26 | public class WordType {
27 |
28 | /**
29 | * Start of a Sentence
30 | */
31 | public final static int SENTENCE_BEGIN = 0;
32 |
33 | /**
34 | * End of a Sentence
35 | */
36 | public final static int SENTENCE_END = 1;
37 |
38 | /**
39 | * Chinese Word
40 | */
41 | public final static int CHINESE_WORD = 2;
42 |
43 | /**
44 | * ASCII String
45 | */
46 | public final static int STRING = 3;
47 |
48 | /**
49 | * ASCII Alphanumeric
50 | */
51 | public final static int NUMBER = 4;
52 |
53 | /**
54 | * Punctuation Symbol
55 | */
56 | public final static int DELIMITER = 5;
57 |
58 | /**
59 | * Full-Width String
60 | */
61 | public final static int FULLWIDTH_STRING = 6;
62 |
63 | /**
64 | * Full-Width Alphanumeric
65 | */
66 | public final static int FULLWIDTH_NUMBER = 7;
67 |
68 | }
--------------------------------------------------------------------------------
/fnlp-app/src/main/java/org/fnlp/app/lucene/demo/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | /**
5 | * @author xpqiu
6 | *
7 | */
8 | package org.fnlp.app.lucene.demo;
--------------------------------------------------------------------------------
/fnlp-app/src/main/java/org/fnlp/app/lucene/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | /**
5 | * @author xpqiu
6 | *
7 | */
8 | package org.fnlp.app.lucene;
--------------------------------------------------------------------------------
/fnlp-app/src/main/java/org/fnlp/app/num/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | /**
5 | * @author xpqiu
6 | *
7 | */
8 | package org.fnlp.app.num;
--------------------------------------------------------------------------------
/fnlp-app/src/main/java/org/fnlp/app/tc/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * 文本分类包.
3 | *
This file is part of FudanNLP.
4 |
5 | *
FudanNLP is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 |
10 | *
FudanNLP is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 |
15 | *
You should have received a copy of the GNU General Public License
16 | * along with FudanNLP. If not, see
17 | * http://www.gnu.org/licenses/.
18 |
19 | *
Copyright 2009-2012 fnlp.org. All rights reserved.
20 | *
21 | * @author fnlp.org
22 | * @since FudanNLP 1.5
23 | * @version 1.0.0
24 | *
25 | */
26 | package org.fnlp.app.tc;
--------------------------------------------------------------------------------
/fnlp-core/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/fnlp-core/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | fnlp-core
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.wst.common.project.facet.core.builder
10 |
11 |
12 |
13 |
14 | org.eclipse.jdt.core.javabuilder
15 |
16 |
17 |
18 |
19 | org.eclipse.wst.validation.validationbuilder
20 |
21 |
22 |
23 |
24 | org.eclipse.m2e.core.maven2Builder
25 |
26 |
27 |
28 |
29 |
30 | org.eclipse.jem.workbench.JavaEMFNature
31 | org.eclipse.wst.common.modulecore.ModuleCoreNature
32 | org.eclipse.m2e.core.maven2Nature
33 | org.eclipse.jdt.core.javanature
34 | org.eclipse.wst.common.project.facet.core.nature
35 |
36 |
37 |
--------------------------------------------------------------------------------
/fnlp-core/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding//src/main/java=UTF-8
3 | encoding//src/test/java=UTF-8
4 | encoding/=UTF-8
5 |
--------------------------------------------------------------------------------
/fnlp-core/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | #Tue Mar 11 13:39:47 CST 2014
2 | encoding/src/test/java=UTF-8
3 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
4 | org.eclipse.jdt.core.compiler.compliance=1.6
5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
6 | encoding/src/main/resources=UTF-8
7 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
8 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
9 | encoding/src/main/java=UTF-8
10 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
12 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
13 | eclipse.preferences.version=1
14 | encoding/src/test/resources=UTF-8
15 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
16 | org.eclipse.jdt.core.compiler.source=1.6
17 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
18 |
--------------------------------------------------------------------------------
/fnlp-core/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/data/reader/ListReader.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.data.reader;
21 |
22 | import java.util.List;
23 |
24 | import org.fnlp.ml.types.Instance;
25 |
26 | public class ListReader extends Reader{
27 |
28 | //测试指代消解临时写的 jszhao
29 | List[] data;
30 | int index;
31 |
32 | public ListReader (List[] data)
33 | {
34 | this.data = data;
35 | this.index = 0;
36 | }
37 |
38 | public Instance next ()
39 | {
40 | return new Instance (data[index++], null);
41 | }
42 |
43 | public boolean hasNext () { return index < data.length; }
44 |
45 |
46 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/data/reader/Reader.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.data.reader;
21 |
22 | import java.util.Iterator;
23 |
24 | import org.fnlp.ml.types.Instance;
25 | import org.fnlp.ml.types.InstanceSet;
26 |
27 | /**
28 | * @author xpqiu
29 | * @version 1.0
30 | * Reader为数据读入接口,用一个迭代器依次读入数据,每次返回一个Instance对象
31 | * 使得数据处理和读入无关
32 | * package edu.fudan.data.reader
33 | */
34 | public abstract class Reader implements Iterator {
35 |
36 | public void remove () {
37 | throw new IllegalStateException ("This Iterator does not support remove().");
38 | }
39 |
40 |
41 | public InstanceSet read(){
42 | InstanceSet instSet = new InstanceSet();
43 | while (hasNext()) {
44 | Instance inst = next();
45 | if(inst!=null){
46 | instSet.add(inst);
47 | }
48 | }
49 | return instSet;
50 | }
51 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/data/reader/StringReader.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.data.reader;
21 |
22 | import org.fnlp.ml.types.Instance;
23 |
24 | /**
25 | * 字符串数组,每维为一个样本,无类别信息
26 | * @author xpqiu
27 | * @version 1.0
28 | * StringReader
29 | * package edu.fudan.ml.data
30 | */
31 | public class StringReader extends Reader
32 | {
33 | String[] data;
34 | int index;
35 |
36 | public StringReader (String[] data)
37 | {
38 | this.data = data;
39 | this.index = 0;
40 | }
41 |
42 | public Instance next ()
43 | {
44 | return new Instance (data[index++], null);
45 | }
46 |
47 | public boolean hasNext () { return index < data.length; }
48 |
49 |
50 |
51 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/data/reader/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | /**
21 | * 数据读取包:处理不同类型格式的数据。
22 | */
23 | package org.fnlp.data.reader;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/classifier/TPredict.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.classifier;
21 |
22 | /**
23 | * 结果接口
24 | * @author xpqiu
25 | * @version 2.0
26 | * @since 1.5
27 | */
28 | public interface TPredict {
29 | /**
30 | * 获得预测结果
31 | * @param i 位置
32 | * @return 第i个预测结果;如果不存在,为NULL
33 | */
34 | public T getLabel(int i);
35 | /**
36 | * 获得预测结果的得分
37 | * @param i 位置
38 | * @return 第i个预测结果的得分;不存在为Double.NEGATIVE_INFINITY
39 | */
40 | public float getScore(int i);
41 | /**
42 | * 归一化得分
43 | */
44 | public void normalize();
45 | /**
46 | * 预测结果数量
47 | * @return 预测结果的数量
48 | */
49 | public int size();
50 | /**
51 | * 得到所有标签
52 | * @return
53 | */
54 | public T[] getLabels();
55 | /**
56 | * 删除位置i的信息
57 | * @param i
58 | */
59 | public void remove(int i);
60 |
61 |
62 |
63 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/classifier/bayes/BayesTrainer.java:
--------------------------------------------------------------------------------
1 | package org.fnlp.ml.classifier.bayes;
2 |
3 | import gnu.trove.iterator.TIntFloatIterator;
4 |
5 | import java.util.List;
6 |
7 | import org.fnlp.ml.classifier.AbstractClassifier;
8 | import org.fnlp.ml.classifier.linear.AbstractTrainer;
9 | import org.fnlp.ml.types.Instance;
10 | import org.fnlp.ml.types.InstanceSet;
11 | import org.fnlp.ml.types.alphabet.AlphabetFactory;
12 | import org.fnlp.ml.types.sv.HashSparseVector;
13 | import org.fnlp.nlp.pipe.Pipe;
14 | import org.fnlp.nlp.pipe.SeriesPipes;
15 | /**
16 | * 贝叶斯文本分类模型训练器
17 | * 输入训练数据为稀疏矩阵
18 | * @author sywu
19 | *
20 | */
21 | public class BayesTrainer{
22 |
23 | public AbstractClassifier train(InstanceSet trainset) {
24 | AlphabetFactory af=trainset.getAlphabetFactory();
25 | SeriesPipes pp=(SeriesPipes) trainset.getPipes();
26 | pp.removeTargetPipe();
27 | return train(trainset,af,pp);
28 | }
29 | public AbstractClassifier train(InstanceSet trainset,AlphabetFactory af,Pipe pp) {
30 | ItemFrequency tf=new ItemFrequency(trainset,af);
31 | BayesClassifier classifier=new BayesClassifier();
32 | classifier.setFactory(af);
33 | classifier.setPipe(pp);
34 | classifier.setTf(tf);
35 | return classifier;
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/classifier/hier/Statistic.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.classifier.hier;
21 |
22 | public class Statistic {
23 |
24 | /**
25 | * @param args
26 | */
27 | public static void main(String[] args) {
28 |
29 |
30 | }
31 |
32 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/classifier/knn/VotePredict.java:
--------------------------------------------------------------------------------
1 | package org.fnlp.ml.classifier.knn;
2 |
3 | import java.util.HashMap;
4 | import java.util.Map;
5 |
6 | import org.fnlp.ml.classifier.Predict;
7 |
8 | public class VotePredict extends Predict {
9 |
10 | public VotePredict(int k){
11 | super(k);
12 | }
13 | public T getLabel() {
14 | T label=labels[0];
15 | int count=0;
16 | Map labelCount = new HashMap();
17 | for(int pos=0;poscount){
29 | count=tempCount;
30 | label=labels[pos];
31 | }
32 | }
33 | return label;
34 | }
35 | public Predict getNLabels(int labels_num){
36 | Predict pred=new Predict(labels_num);
37 |
38 | Map labelCount = new HashMap();
39 | for(int i=0;iThis file is part of FudanNLP.
4 |
5 | *
FudanNLP is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 |
10 | *
FudanNLP is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 |
15 | *
You should have received a copy of the GNU General Public License
16 | * along with FudanNLP. If not, see
17 | * http://www.gnu.org/licenses/.
18 |
19 | *
Copyright 2009-2012 fnlp.org. All rights reserved.
20 | *
21 | * @author fnlp.org
22 | * @since FudanNLP 1.5
23 | * @version 1.0.0
24 | */
25 | package org.fnlp.ml.classifier.knn;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/classifier/linear/AbstractTrainer.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.classifier.linear;
21 |
22 | import org.fnlp.ml.classifier.AbstractClassifier;
23 | import org.fnlp.ml.types.InstanceSet;
24 |
25 | /**
26 | * 抽象参数训练类
27 | * @author Feng Ji
28 | *
29 | */
30 | public abstract class AbstractTrainer {
31 |
32 | /**
33 | * 抽象参数训练方法
34 | * @param trainset 训练数据集
35 | * @param devset 评估性能的数据集,可以为NULL
36 | * @return 分类器
37 | */
38 | public abstract AbstractClassifier train(InstanceSet trainset, InstanceSet devset);
39 |
40 | /**
41 | * 参数训练方法
42 | * @param trainset 训练数据集
43 | * @return 分类器
44 | */
45 | public AbstractClassifier train(InstanceSet trainset){
46 | return train(trainset,null);
47 | }
48 |
49 | /**
50 | * 评估性能方法
51 | * @param devset 评估性能的数据集
52 | */
53 | protected abstract void evaluate(InstanceSet devset);
54 |
55 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/classifier/linear/inf/Inferencer.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.classifier.linear.inf;
21 |
22 | import java.io.Serializable;
23 |
24 | import org.fnlp.ml.classifier.TPredict;
25 | import org.fnlp.ml.types.Instance;
26 |
27 | /**
28 | * 推理类
29 | * @author xpqiu
30 | *
31 | */
32 | public abstract class Inferencer implements Serializable {
33 |
34 | private static final long serialVersionUID = -7254946709189008567L;
35 |
36 | protected float[] weights;
37 |
38 | protected boolean isUseTarget;
39 |
40 | /**
41 | * 得到前n个最可能的预测值
42 | * @param inst
43 | * @return
44 | * Sep 9, 2009
45 | */
46 | public abstract TPredict getBest(Instance inst);
47 |
48 | public abstract TPredict getBest(Instance inst, int n);
49 |
50 | public float[] getWeights() {
51 | return weights;
52 | }
53 |
54 | public void setWeights(float[] weights) {
55 | this.weights = weights;
56 | }
57 |
58 | public void isUseTarget(boolean b) {
59 | isUseTarget = b;
60 | }
61 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/classifier/linear/inf/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * 解码器包,配合 edu.fudan.ml.classifier.linear中的分类器使用.
3 | *
4 | *
This file is part of FudanNLP.
5 |
6 | *
FudanNLP is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 |
11 | *
FudanNLP is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 |
16 | *
You should have received a copy of the GNU General Public License
17 | * along with FudanNLP. If not, see
18 | * http://www.gnu.org/licenses/.
19 |
20 | *
Copyright 2009-2012 fnlp.org. All rights reserved.
21 | *
22 | * @author fnlp.org
23 | * @since FudanNLP 1.5
24 | * @version 1.0.0
25 | *
26 | */
27 | package org.fnlp.ml.classifier.linear.inf;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/classifier/linear/update/LinearMaxPAUpdate.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.classifier.linear.update;
21 |
22 | import org.fnlp.ml.loss.Loss;
23 | import org.fnlp.ml.types.Instance;
24 |
25 | /**
26 | * 线性分类的参数更新类,采用PA算法
27 | */
28 | public class LinearMaxPAUpdate extends AbstractPAUpdate {
29 |
30 | public LinearMaxPAUpdate(Loss loss) {
31 | super(loss);
32 | }
33 |
34 | @Override
35 | protected int diff(Instance inst, float[] weights, Object target,
36 | Object predict) {
37 |
38 | int[] data = (int[]) inst.getData();
39 | int gold;
40 | if (target == null)
41 | gold = (Integer) inst.getTarget();
42 | else
43 | gold = (Integer) target;
44 | int pred = (Integer) predict;
45 |
46 | for (int i = 0; i < data.length; i++) {
47 | if (data[i] != -1) {
48 | int ts = data[i] + gold;
49 | int ps = data[i] + pred;
50 | diffv.put(ts, 1.0f);
51 | diffv.put(ps, -1.0f);
52 | diffw += weights[ts]-weights[ps]; // w^T(f(x,y)-f(x,ybar))
53 | }
54 | }
55 |
56 | return 1;
57 | }
58 |
59 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/classifier/linear/update/Update.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.classifier.linear.update;
21 |
22 | import org.fnlp.ml.types.Instance;
23 |
24 | public interface Update {
25 |
26 | /**
27 | *
28 | * @param inst 样本实例
29 | * @param weights 权重
30 | * @param k 目前遍历的样本数
31 | * @param extraweight 平均化感知器需要减去的权重
32 | * @param predictLabel 预测类别
33 | * @param c 步长阈值
34 | * @return 预测类别和真实类别之间的损失
35 | */
36 | public float update(Instance inst, float[] weights, int k, float[] extraweight, Object predictLabel,
37 | float c);
38 |
39 | /**
40 | *
41 | * @param inst 样本实例
42 | * @param weights 权重
43 | * @param k 目前遍历的样本数
44 | * @param extraweight 平均化感知器需要减去的权重
45 | * @param predictLabel 预测类别
46 | * @param goldenLabel 真实类别
47 | * @param c 步长阈值
48 | * @return 预测类别和真实类别之间的损失
49 | */
50 | public float update(Instance inst, float[] weights, int k, float[] extraweight, Object predictLabel,
51 | Object goldenLabel, float c);
52 |
53 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/classifier/linear/update/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * 在线学习权重调整,配合 edu.fudan.ml.classifier.linear中的分类器使用
3 | *
This file is part of FudanNLP.
4 |
5 | *
FudanNLP is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 |
10 | *
FudanNLP is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 |
15 | *
You should have received a copy of the GNU General Public License
16 | * along with FudanNLP. If not, see
17 | * http://www.gnu.org/licenses/.
18 |
19 | *
FudanNLP is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 |
11 | *
FudanNLP is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 |
16 | *
You should have received a copy of the GNU General Public License
17 | * along with FudanNLP. If not, see
18 | * http://www.gnu.org/licenses/.
19 |
20 | *
FudanNLP is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 |
10 | *
FudanNLP is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 |
15 | *
You should have received a copy of the GNU General Public License
16 | * along with FudanNLP. If not, see
17 | * http://www.gnu.org/licenses/.
18 |
19 | *
Copyright 2009-2012 fnlp.org. All rights reserved.
20 | *
21 | * @author fnlp.org
22 | * @since FudanNLP 1.5
23 | * @version 1.0.0
24 | *
25 | */
26 | package org.fnlp.ml.classifier.struct.update;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/eval/ScoreUsage.java:
--------------------------------------------------------------------------------
1 | package org.fnlp.ml.eval;
2 |
3 | import java.io.IOException;
4 |
5 | import org.fnlp.util.MyFiles;
6 |
7 | public class ScoreUsage {
8 |
9 | public static void main(String[] args) throws IOException {
10 |
11 | Score ss = new Score();
12 |
13 | int numofclass = 10;
14 |
15 | String str = MyFiles.loadString("../tmp/Sogou_SVM");
16 |
17 | String[] s = str.split("\n");
18 | Integer[] golden= new Integer[s.length];
19 | Integer[] pred = new Integer[s.length];
20 | for (int i = 0; i < s.length; i++) {
21 | String[] ele = s[i].split("\\s");
22 | int g = Integer.parseInt(ele[0]);
23 | int p = Integer.parseInt(ele[1]);
24 | golden[i] = g;
25 | pred[i] = p;
26 | }
27 | String res = ss.score(pred, golden, numofclass);
28 | System.out.println(res);
29 |
30 | }
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/eval/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * 分类结果评测包.
3 | *
This file is part of FudanNLP.
4 |
5 | *
FudanNLP is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 |
10 | *
FudanNLP is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 |
15 | *
You should have received a copy of the GNU General Public License
16 | * along with FudanNLP. If not, see
17 | * http://www.gnu.org/licenses/.
18 |
19 | *
Copyright 2009-2012 fnlp.org. All rights reserved.
20 | *
21 | * @author fnlp.org
22 | * @since FudanNLP 1.5
23 | * @version 1.0.0
24 | *
25 | */
26 | package org.fnlp.ml.eval;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/feature/BaseGenerator.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.feature;
21 |
22 | import org.fnlp.ml.types.Instance;
23 | import org.fnlp.ml.types.sv.HashSparseVector;
24 | import org.fnlp.ml.types.sv.ISparseVector;
25 |
26 | /**
27 | * 简单将data返回 特征不包含类别信息
28 | *
29 | * @author xpqiu
30 | *
31 | */
32 | public class BaseGenerator extends Generator {
33 |
34 | private static final long serialVersionUID = 5209575930740335391L;
35 |
36 |
37 | public ISparseVector getVector(Instance inst) {
38 |
39 | return (ISparseVector) inst.getData();
40 | }
41 |
42 | public ISparseVector getVector(Instance inst, Object object) {
43 | return getVector(inst);
44 | }
45 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/feature/Generator.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.feature;
21 |
22 | import java.io.Serializable;
23 |
24 | import org.fnlp.ml.types.Instance;
25 | import org.fnlp.ml.types.sv.HashSparseVector;
26 | import org.fnlp.ml.types.sv.ISparseVector;
27 |
28 | /**
29 | * 生成特征向量,包含类别信息
30 | *
31 | * @author xpqiu
32 | * @version 1.0
33 | */
34 | public abstract class Generator implements Serializable {
35 |
36 | private static final long serialVersionUID = 8640098825477722199L;
37 |
38 | public Generator() {
39 | }
40 |
41 | public ISparseVector getVector(Instance inst) {
42 | return getVector(inst, inst.getTarget());
43 | }
44 |
45 | public abstract ISparseVector getVector(Instance inst, Object object);
46 |
47 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/feature/SFGenerator.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.feature;
21 |
22 | import org.fnlp.ml.feature.Generator;
23 | import org.fnlp.ml.types.Instance;
24 | import org.fnlp.ml.types.sv.BinarySparseVector;
25 | import org.fnlp.ml.types.sv.HashSparseVector;
26 | import org.fnlp.ml.types.sv.ISparseVector;
27 | import org.fnlp.ml.types.sv.SparseVector;
28 |
29 | /**
30 | * 结构化特征生成类
31 | *
32 | * @version Feb 16, 2009
33 | */
34 | public class SFGenerator extends Generator {
35 |
36 | private static final long serialVersionUID = 6404015214630864081L;
37 |
38 | /**
39 | * 构造函数
40 | */
41 | public SFGenerator() {
42 | }
43 |
44 | @Override
45 | public ISparseVector getVector(Instance inst, Object label) {
46 | int[] data = (int[]) inst.getData();
47 | ISparseVector fv = new BinarySparseVector(data.length);
48 | for(int i = 0; i < data.length; i++) {
49 | int idx = data[i]+(Integer)label;
50 | fv.put(idx);
51 | }
52 | return fv;
53 | }
54 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/loss/Loss.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.loss;
21 |
22 | public interface Loss {
23 |
24 | /**
25 | * 计算l1和l2之间的损失
26 | * @param l1 对象1
27 | * @param l2 对象2
28 | * @return 损失
29 | */
30 | public float calc(Object l1, Object l2);
31 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/loss/ZeroOneLoss.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.loss;
21 |
22 | public class ZeroOneLoss implements Loss {
23 |
24 | private float calc(Integer i1, Integer i2) {
25 | return i1==i2?0:1;
26 | }
27 |
28 | private float calc(String l1, String l2) {
29 | return l1.equals(l2)?0:1;
30 | }
31 |
32 | public float calc(Object l1, Object l2) {
33 | if (!l1.getClass().equals(l2.getClass())) {
34 | throw new IllegalArgumentException("Exception in ZeroOneLoss: l1 and l2 have different types");
35 | }
36 |
37 | float ret = 0;
38 | if (l1 instanceof Integer) {
39 | ret = calc((Integer)l1, (Integer)l2);
40 | }else if (l1 instanceof String) {
41 | ret = calc((String)l1, (String)l2);
42 | }
43 |
44 | return ret;
45 | }
46 |
47 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/loss/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * 损失计算函数.
3 | *
This file is part of FudanNLP.
4 |
5 | *
FudanNLP is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 |
10 | *
FudanNLP is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 |
15 | *
You should have received a copy of the GNU General Public License
16 | * along with FudanNLP. If not, see
17 | * http://www.gnu.org/licenses/.
18 |
19 | *
Copyright 2009-2012 fnlp.org. All rights reserved.
20 | *
21 | * @author fnlp.org
22 | * @since FudanNLP 1.5
23 | * @version 1.0.0
24 | *
25 | */
26 | package org.fnlp.ml.loss;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/loss/struct/HybridHammingLoss.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.loss.struct;
21 |
22 | import org.fnlp.ml.loss.struct.HammingLoss;
23 |
24 | /**
25 | * 计算双链的Hamming距离
26 | * @author Feng Ji
27 | *
28 | */
29 | public class HybridHammingLoss extends HammingLoss {
30 |
31 | /**
32 | * 计算o1和o2之间的Hamming距离,o1和o2必须是同类型的对象
33 | * @param o1 对象1(支持二维整型数组)
34 | * @param o2 对象2(支持二维整型数组)
35 | * @return Hamming距离
36 | */
37 | @Override
38 | public float calc(Object o1, Object o2) {
39 | if (!o1.getClass().equals(o2.getClass()))
40 | throw new IllegalArgumentException("Exception in HybridHammingLoss: o1 and o2 have different types");
41 |
42 | int[][] l1 = (int[][]) o1;
43 | int[][] l2 = (int[][]) o2;
44 | int ne = 0;
45 | for (int i = 0; i < l1[0].length; i++) {
46 | for (int j = 0; j < l1.length; j++) {
47 | if (l1[j][i] != l2[j][i]) {
48 | ne++;
49 | break;
50 | }
51 | }
52 | }
53 | return ne;
54 | }
55 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/loss/struct/SequenceLoss.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.loss.struct;
21 |
22 | import org.fnlp.ml.loss.Loss;
23 |
24 | public class SequenceLoss implements Loss {
25 | /**
26 | *
27 | * @author xpqiu
28 | *
29 | */
30 | public static enum Type {
31 | POINT, EDGE
32 | }
33 |
34 | Type type;
35 |
36 | public SequenceLoss(Type type) {
37 | this.type = type;
38 | }
39 |
40 | public float calc(Object o1, Object o2) {
41 |
42 | float errCount = 0;
43 | if (o1 instanceof int[] && o2 instanceof int[]) {
44 | int[] pred = (int[]) o1;
45 | int[] gold = (int[]) o2;
46 |
47 | if (type == Type.POINT) {
48 | for (int i = 0; i < pred.length; i++) {
49 | if (pred[i] != gold[i])
50 | errCount++;
51 | }
52 | }else if (type == Type.EDGE) {
53 | for (int i = 1; i < pred.length; i++) {
54 | if (pred[i - 1] != gold[i - 1] || pred[i] != gold[i])
55 | errCount++;
56 | }
57 | }
58 | }
59 |
60 | return errCount;
61 | }
62 |
63 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/loss/struct/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * 针对结构化分类结果的损失计算函数.
3 | *
This file is part of FudanNLP.
4 |
5 | *
FudanNLP is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 |
10 | *
FudanNLP is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 |
15 | *
You should have received a copy of the GNU General Public License
16 | * along with FudanNLP. If not, see
17 | * http://www.gnu.org/licenses/.
18 |
19 | *
FudanNLP is free software: you can redistribute it and/or modify
12 | it under the terms of the GNU Lesser General Public License as published by
13 | the Free Software Foundation, either version 3 of the License, or
14 | (at your option) any later version.
15 |
16 |
FudanNLP is distributed in the hope that it will be useful,
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | GNU Lesser General Public License for more details.
20 |
21 |
You should have received a copy of the GNU General Public License
22 | along with FudanNLP. If not, see
23 | http://www.gnu.org/licenses/.
24 |
25 |
Copyright 2009-2012 fnlp.org. All rights reserved.
26 |
27 |
28 |
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/types/DynamicInfo.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.types;
21 |
22 | public class DynamicInfo {
23 | private String pos;
24 | private String word;
25 | private int len;
26 |
27 | public DynamicInfo(String pos, String word, int len) {
28 | this.pos = pos;
29 | this.word = word;
30 | this.len = len;
31 | }
32 |
33 | public String getPos() {
34 | return pos;
35 | }
36 | public void setPos(String pos) {
37 | this.pos = pos;
38 | }
39 | public String getWord() {
40 | return word;
41 | }
42 | public void setWord(String word) {
43 | this.word = word;
44 | }
45 | public int getLen() {
46 | return len;
47 | }
48 | public void setLen(int len) {
49 | this.len = len;
50 | }
51 |
52 | public String toString() {
53 | return word + "/" + pos + "/" + len;
54 | }
55 |
56 |
57 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/types/alphabet/ILabelAlphabet.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.types.alphabet;
21 |
22 |
23 |
24 | /**
25 | * 标记词典,以自增的方式存放标记
26 | * @version 1.0
27 | *
28 | */
29 | public interface ILabelAlphabet extends IAlphabet {
30 |
31 |
32 | /**
33 | * 查找索引编号对应的标记
34 | * @param id 索引编号
35 | * @return 标记
36 | */
37 | public T lookupString(int id);
38 |
39 | /**
40 | * 查找一组编号对应的标记
41 | * @param ids 索引编号数组
42 | * @return 标记数组
43 | */
44 | public T[] lookupString(int[] ids);
45 |
46 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/types/alphabet/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * 特征字典.
3 | *
This file is part of FudanNLP.
4 |
5 | *
FudanNLP is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 |
10 | *
FudanNLP is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 |
15 | *
You should have received a copy of the GNU General Public License
16 | * along with FudanNLP. If not, see
17 | * http://www.gnu.org/licenses/.
18 |
19 | *
Copyright 2009-2012 fnlp.org. All rights reserved.
20 | *
21 | * @author fnlp.org
22 | * @since FudanNLP 1.6
23 | * @version 1.0.0
24 | */
25 | package org.fnlp.ml.types.alphabet;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/types/featurecluster/AbstractCluster.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.types.featurecluster;
21 |
22 | import java.util.HashMap;
23 |
24 | public abstract class AbstractCluster {
25 | public abstract void process();
26 | public abstract HashMap getMap();
27 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/types/featurecluster/AbstractDistance.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.types.featurecluster;
21 |
22 | public abstract class AbstractDistance {
23 | public abstract double cal(ClassData cd1, ClassData cd2);
24 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/types/package.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Insert title here
6 |
7 |
8 |
基本数据类型。
9 |
This file is part of FudanNLP.
10 |
11 |
FudanNLP is free software: you can redistribute it and/or modify
12 | it under the terms of the GNU Lesser General Public License as published by
13 | the Free Software Foundation, either version 3 of the License, or
14 | (at your option) any later version.
15 |
16 |
FudanNLP is distributed in the hope that it will be useful,
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | GNU Lesser General Public License for more details.
20 |
21 |
You should have received a copy of the GNU General Public License
22 | along with FudanNLP. If not, see
23 | http://www.gnu.org/licenses/.
24 |
25 |
Copyright 2009-2012 fnlp.org. All rights reserved.
26 |
27 |
28 |
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/types/sv/ISparseVector.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.types.sv;
21 |
22 | import java.io.Serializable;
23 |
24 |
25 | /**
26 | * 稀疏向量,并实现各种向量运算
27 | *
28 | */
29 | public interface ISparseVector extends Serializable {
30 |
31 | /**
32 | * 点积
33 | * @param vector
34 | * @return
35 | */
36 | public float dotProduct(float[] vector);
37 |
38 | /**
39 | *
40 | * @param sv
41 | * @return
42 | */
43 | public float dotProduct(HashSparseVector sv);
44 |
45 | /**
46 | * 增加元素
47 | */
48 | public void put(int i);
49 | /**
50 | * 增加多个元素
51 | */
52 | public void put(int[] idx);
53 | /**
54 | * L2模
55 | */
56 | public float l2Norm2();
57 |
58 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/ml/types/sv/Vector.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.ml.types.sv;
21 |
22 |
23 | /**
24 | * 一般向量,只是封装为统一接口
25 | * @author Xipeng
26 | *
27 | */
28 | public class Vector implements ISparseVector {
29 |
30 | private static final long serialVersionUID = -7805496876863128028L;
31 |
32 | float[] data;
33 |
34 | public Vector(int size){
35 | data = new float[size];
36 | }
37 |
38 | public Vector(float[] data){
39 | this.data = data;
40 | }
41 |
42 | @Override
43 | public float dotProduct(float[] vector) {
44 | System.out.println("未实现");
45 | return 0;
46 | }
47 |
48 | @Override
49 | public float dotProduct(HashSparseVector sv) {
50 | return sv.dotProduct(data);
51 | }
52 |
53 | @Override
54 | public void put(int i) {
55 | System.out.println("未实现");
56 |
57 | }
58 |
59 | @Override
60 | public void put(int[] idx) {
61 | System.out.println("未实现");
62 |
63 | }
64 |
65 | @Override
66 | public float l2Norm2() {
67 | // TODO Auto-generated method stub
68 | return 0;
69 | }
70 |
71 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/cn/Tags.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.cn;
21 |
22 | import java.util.regex.Pattern;
23 |
24 |
25 | /**
26 | * 中文词性操作类
27 | * @author xpqiu
28 | * @version 1.0
29 | * @since FudanNLP 1.5
30 | */
31 | public class Tags {
32 |
33 |
34 |
35 |
36 |
37 | static Pattern nounsPattern = Pattern.compile("名词|人名|地名|机构名|专有名");
38 |
39 | public static boolean isNoun(String pos) {
40 | return (nounsPattern.matcher(pos).find());
41 | }
42 |
43 |
44 | static Pattern stopwordPattern = Pattern.compile(".*代词|标点|介词|从属连词|语气词|叹词|结构助词|拟声词|方位词");
45 | /**
46 | * 判断词性是否为无意义词。
47 | * @param pos 词性
48 | * @return true,false
49 | */
50 | public static boolean isStopword(String pos) {
51 | return (stopwordPattern.matcher(pos).find());
52 | }
53 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/cn/anaphora/ARInstanceGetter.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.cn.anaphora;
21 |
22 | import org.fnlp.ml.types.Instance;
23 |
24 | /**
25 | * 获得指代消解的样本
26 | * @author jszhao
27 | * @version 1.0
28 | * @since FudanNLP 1.5
29 | */
30 |
31 | public class ARInstanceGetter {
32 |
33 | private Instance instance;
34 | public ARInstanceGetter(FeatureGeter fBuilder){
35 | this.instance = new Instance(fBuilder.getFeature(),
36 | fBuilder.getInst().getTarget());
37 | this.instance.setSource(fBuilder.getInst().getData());
38 | }
39 |
40 | public Instance getInstance(){
41 | return this.instance;
42 | }
43 |
44 |
45 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/cn/anaphora/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * 指代消解包.
3 | *
This file is part of FudanNLP.
4 |
5 | *
FudanNLP is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 |
10 | *
FudanNLP is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 |
15 | *
You should have received a copy of the GNU General Public License
16 | * along with FudanNLP. If not, see
17 | * http://www.gnu.org/licenses/.
18 |
19 | *
Copyright 2009-2012 fnlp.org. All rights reserved.
20 | *
21 | * @author fnlp.org
22 | * @since FudanNLP 1.5
23 | * @version 1.0.0
24 | *
25 | */
26 | package org.fnlp.nlp.cn.anaphora;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/cn/anaphora/train/FileGroup.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.cn.anaphora.train;
21 |
22 | import java.io.File;
23 | /**
24 | * 文件组合,包括原文件和标记好的文件
25 | * @author jszhao
26 | * @version 1.0
27 | * @since FudanNLP 1.5
28 | */
29 | public class FileGroup {
30 | private File orgFile; // 原文件
31 | private File markFile; //标记文件
32 |
33 | public FileGroup(File orgFile,File markFile){
34 | this.orgFile= orgFile;
35 | this.markFile = markFile;
36 |
37 | }
38 |
39 | public File getOrgFile(){
40 | return orgFile;
41 | }
42 | public File getMarkFile(){
43 | return markFile;
44 | }
45 | public void setOrgFile(File orgFile){
46 | this.orgFile = orgFile;
47 | }
48 | public void setMarkFile(File markFile){
49 | this.markFile = markFile;
50 | }
51 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/cn/anaphora/train/package-info.java:
--------------------------------------------------------------------------------
1 |
2 | /**
3 | * @author xpqiu
4 | *
5 | */
6 | package org.fnlp.nlp.cn.anaphora.train;
7 |
8 | /**
9 | * 训练步骤:
10 | * 1、通过DocFilter.java过滤掉不含有第三人称代词和指示代词的文件;
11 | * 2、通过MyDocumentWriter.java生成特征训练文件;
12 | * 3、通过ARClassifier.java对生成的特征训练文件进行训练,生成训练模型。
13 | */
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/cn/ner/ne/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | /**
5 | * @author Xipeng
6 | *
7 | */
8 | package org.fnlp.nlp.cn.ner.ne;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/cn/ner/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * 专有实体名识别包,使用特定方法进行简单的实体名识别.
3 | *
This file is part of FudanNLP.
4 |
5 | *
FudanNLP is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 |
10 | *
FudanNLP is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 |
15 | *
You should have received a copy of the GNU General Public License
16 | * along with FudanNLP. If not, see
17 | * http://www.gnu.org/licenses/.
18 |
19 | *
FudanNLP is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 |
10 | *
FudanNLP is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 |
15 | *
You should have received a copy of the GNU General Public License
16 | * along with FudanNLP. If not, see
17 | * http://www.gnu.org/licenses/.
18 |
19 | *
Copyright 2009-2012 fnlp.org. All rights reserved.
20 | *
21 | * @author fnlp.org
22 | * @since FudanNLP 1.5
23 | * @version 1.0.0
24 | *
25 | */
26 | package org.fnlp.nlp.cn;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/cn/tag/format/BasicFormatter.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.cn.tag.format;
21 |
22 | import java.util.List;
23 |
24 | import org.fnlp.ml.types.Instance;
25 | import org.fnlp.ml.types.InstanceSet;
26 | /**
27 | *
28 | * @author xpqiu
29 | *
30 | */
31 | public class BasicFormatter {
32 | public static String format(InstanceSet testSet, String[][] labelsSet) {
33 | StringBuilder sb = new StringBuilder();
34 | for (int i = 0; i < testSet.size(); i++) {
35 | Instance inst = testSet.getInstance(i);
36 | String[] labels = labelsSet[i];
37 | sb.append(format(inst, labels));
38 | sb.append("\n");
39 | }
40 | return sb.toString();
41 | }
42 |
43 | public static String format(Instance inst, String[] labels) {
44 |
45 | StringBuilder sb = new StringBuilder();
46 | List data = (List) inst.getSource();
47 |
48 | for (int j = 0; j < data.size(); j++) {
49 | sb.append(data.get(j));
50 | sb.append('\t');
51 | sb.append(labels[j]);
52 | sb.append("\n");
53 | }
54 | return sb.toString();
55 | }
56 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/cn/tag/format/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * 序列标注结果格式化包.
3 | *
This file is part of FudanNLP.
4 |
5 | *
FudanNLP is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 |
10 | *
FudanNLP is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 |
15 | *
You should have received a copy of the GNU General Public License
16 | * along with FudanNLP. If not, see
17 | * http://www.gnu.org/licenses/.
18 |
19 | *
FudanNLP is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 |
10 | *
FudanNLP is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 |
15 | *
You should have received a copy of the GNU General Public License
16 | * along with FudanNLP. If not, see
17 | * http://www.gnu.org/licenses/.
18 |
19 | *
Copyright 2009-2012 fnlp.org. All rights reserved.
20 | *
21 | * @author fnlp.org
22 | * @since FudanNLP 1.5
23 | * @version 1.0.0
24 | *
25 | */
26 | package org.fnlp.nlp.cn.tag;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/corpus/ctbconvert/FCTB2CONLL.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.corpus.ctbconvert;
21 |
22 | import java.io.IOException;
23 | import java.nio.charset.Charset;
24 |
25 | import org.fnlp.ml.types.InstanceSet;
26 | /**
27 | * CTB转为FNLP格式
28 | * @author Xipeng
29 | *
30 | */
31 | public class FCTB2CONLL {
32 |
33 | public static void main(String[] args) throws IOException{
34 | DependentTreeProducter rp = new DependentTreeProducter();
35 | InstanceSet ins = MyTreebankReader.readTrees("../data/ctb/data", null,Charset.forName("UTF8"));
36 | // InstanceSet ins = MyTreebankReader.readNewTrees("./data/ctb/data", null,Charset.forName("UTF8"));
37 |
38 | rp.write(ins, "../data/ctb/result.txt", "../data/headrules.txt");
39 | System.out.print("Done!");
40 | }
41 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/corpus/ctbconvert/FCTB2CONLLTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.corpus.ctbconvert;
21 |
22 | import java.io.IOException;
23 | import java.nio.charset.Charset;
24 |
25 | import org.fnlp.ml.types.InstanceSet;
26 | /**
27 | * CTB转为FNLP格式
28 | * @author Xipeng
29 | *
30 | */
31 | public class FCTB2CONLLTest {
32 |
33 | public static void main(String[] args) throws IOException{
34 | DependentTreeProducter rp = new DependentTreeProducter();
35 | rp.debug = true;
36 | InstanceSet ins = MyTreebankReader.readTrees("./data/ctb/test.txt", null,Charset.forName("UTF8"));
37 |
38 | rp.write(ins, "./data/ctb/result.txt", "./data/headrules.txt");
39 | System.out.print("Done!");
40 | }
41 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/corpus/ctbconvert/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * CTB树库转换
3 | */
4 | package org.fnlp.nlp.corpus.ctbconvert;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/corpus/fnlp/filter/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | /**
5 | * @author Xipeng
6 | *
7 | */
8 | package org.fnlp.nlp.corpus.fnlp.filter;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/corpus/fnlp/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * FNLP数据内部格式
3 | * @author Xipeng
4 | *
5 | */
6 | package org.fnlp.nlp.corpus.fnlp;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/corpus/package.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Insert title here
6 |
7 |
8 |
自然语言处理语料处理包。
9 |
This file is part of FudanNLP.
10 |
11 |
FudanNLP is free software: you can redistribute it and/or modify
12 | it under the terms of the GNU Lesser General Public License as published by
13 | the Free Software Foundation, either version 3 of the License, or
14 | (at your option) any later version.
15 |
16 |
FudanNLP is distributed in the hope that it will be useful,
17 | but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | GNU Lesser General Public License for more details.
20 |
21 |
You should have received a copy of the GNU General Public License
22 | along with FudanNLP. If not, see
23 | http://www.gnu.org/licenses/.
24 |
25 |
Copyright 2009-2012 fnlp.org. All rights reserved.
26 |
27 |
28 |
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/corpus/third/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * 第三方数据处理
3 | */
4 | package org.fnlp.nlp.corpus.third;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/duplicate/DocSim.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.duplicate;
21 |
22 | import java.util.ArrayList;
23 |
24 |
25 |
26 | public class DocSim implements Comparable {
27 | public ArrayList ids;
28 |
29 | public DocSim(ArrayList ids) {
30 | this.ids = ids;
31 | }
32 |
33 | public int compareTo(DocSim ds) {
34 | if (ids.size() < ds.ids.size())
35 | return 1;
36 | else
37 | return -1;
38 | }
39 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/duplicate/Documents.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.duplicate;
21 |
22 | public class Documents {
23 |
24 | public Documents() {
25 |
26 | }
27 | public Documents(String ss) {
28 | content = ss;
29 | }
30 |
31 | public String content;
32 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/duplicate/ISimilarity.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.duplicate;
21 |
22 | import java.util.ArrayList;
23 | import java.util.TreeSet;
24 |
25 |
26 | public interface ISimilarity {
27 |
28 | TreeSet duplicate(ArrayList docs) throws Exception;
29 |
30 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/langmodel/NGramModelTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.langmodel;
21 |
22 | import java.io.IOException;
23 |
24 | public class NGramModelTest{
25 |
26 |
27 | public static void main(String[] args) throws Exception {
28 | //
29 | String segfile_mini = "../tmp/wiki_mini_simp_seg";
30 |
31 | NGramModel model = new NGramModel(2);
32 | model.build(segfile_mini);
33 |
34 | // System.out.println("perplexity:" + model.computePerplexity("tmp/poi.dic"));
35 | System.out.println(model.getProbability("利用 符号"));
36 | }
37 |
38 |
39 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/langmodel/lda/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | /**
5 | * @author Xipeng
6 | *
7 | */
8 | package org.fnlp.nlp.langmodel.lda;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/langmodel/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | /**
5 | * @author xpqiu
6 | *
7 | */
8 | package org.fnlp.nlp.langmodel;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/parser/dep/analysis/AnalysisSentence.java:
--------------------------------------------------------------------------------
1 | package org.fnlp.nlp.parser.dep.analysis;
2 |
3 | public class AnalysisSentence {
4 | public String forms[];
5 | public String tags[];
6 | public int goldhead[];
7 | public String goldrel[];
8 | public int predhead[];
9 | public String predrel[];
10 | public AnalysisSentence(String[] forms, String[] tags, int[] goldhead,
11 | String[] goldrel, int[] predhead, String[] predrel) {
12 | super();
13 | this.forms = forms;
14 | this.tags = tags;
15 | this.goldhead = goldhead;
16 | this.goldrel = goldrel;
17 | this.predhead = predhead;
18 | this.predrel = predrel;
19 | }
20 |
21 | public int length(){
22 | return forms.length;
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/parser/dep/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * 依存句法分析包.
3 | *
This file is part of FudanNLP.
4 |
5 | *
FudanNLP is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 |
10 | *
FudanNLP is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 |
15 | *
You should have received a copy of the GNU General Public License
16 | * along with FudanNLP. If not, see
17 | * http://www.gnu.org/licenses/.
18 |
19 | *
FudanNLP is free software: you can redistribute it and/or modify
6 | * it under the terms of the GNU Lesser General Public License as published by
7 | * the Free Software Foundation, either version 3 of the License, or
8 | * (at your option) any later version.
9 |
10 | *
FudanNLP is distributed in the hope that it will be useful,
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | * GNU Lesser General Public License for more details.
14 |
15 | *
You should have received a copy of the GNU General Public License
16 | * along with FudanNLP. If not, see
17 | * http://www.gnu.org/licenses/.
18 |
19 | *
Copyright 2009-2012 fnlp.org. All rights reserved.
20 | *
21 | * @author fnlp.org
22 | * @since FudanNLP 1.5
23 | * @version 1.0.0
24 | *
25 | */
26 |
27 | package org.fnlp.nlp.parser;
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/pipe/Normalize.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.pipe;
21 |
22 | import java.io.Serializable;
23 |
24 | import org.fnlp.ml.types.Instance;
25 | import org.fnlp.ml.types.sv.SparseVector;
26 |
27 | /**
28 | * 归一化,data类型须为SparseVector
29 | * @author xpqiu
30 | *
31 | */
32 | public class Normalize extends Pipe implements Serializable {
33 |
34 | private static final long serialVersionUID = -4740915822925015609L;
35 |
36 | @Override
37 | public void addThruPipe(Instance instance) {
38 | SparseVector data = (SparseVector) instance.getData();
39 | data.normalize();
40 | }
41 |
42 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/pipe/WeightPipe.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.pipe;
21 |
22 | import org.fnlp.ml.types.Instance;
23 |
24 | public class WeightPipe extends Pipe {
25 |
26 | private static final long serialVersionUID = 1L;
27 | private static float[] weight = {};
28 |
29 | public WeightPipe(boolean b){
30 | if(b){
31 | weight = new float[10];
32 | int i=0;
33 | for(;i<5;i++){
34 | weight[i] = 2f;
35 | }
36 | for(;i<10;i++){
37 | weight[i] = 1.5f;
38 | }
39 | }
40 | }
41 |
42 | @Override
43 | public void addThruPipe(Instance inst) throws Exception {
44 |
45 | Object sdata = inst.getData();
46 | int len;
47 | if(sdata instanceof int[][]){//转换后的特征
48 | int[][] data = (int[][]) sdata;
49 | len = data.length;
50 | }else{
51 | System.err.println("WeightPipe: Error");
52 | return;
53 | }
54 |
55 | float w;
56 | if(len.
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.pipe.nlp;
21 |
22 | import org.fnlp.ml.types.Instance;
23 | import org.fnlp.nlp.cn.tag.CWSTagger;
24 | import org.fnlp.nlp.pipe.Pipe;
25 |
26 | /**
27 | * 进行分词等操作
28 | * @author xpqiu
29 | *
30 | */
31 | public class CNPipe extends Pipe{
32 |
33 | private static final long serialVersionUID = -2329969202592736092L;
34 | private transient CWSTagger seg;
35 |
36 | public CNPipe() {
37 | }
38 |
39 | public CNPipe(CWSTagger seg) {
40 | this.seg = seg;
41 | }
42 |
43 | @Override
44 | public void addThruPipe(Instance inst) {
45 | String data = (String) inst.getData();
46 | String[] newdata = seg.tag2Array(data);
47 | inst.setData(newdata);
48 | }
49 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/pipe/nlp/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * 使用了NLP处理工具的数据特征转换器。
3 | *
4 | *
This file is part of FudanNLP.
5 |
6 | *
FudanNLP is free software: you can redistribute it and/or modify
7 | * it under the terms of the GNU Lesser General Public License as published by
8 | * the Free Software Foundation, either version 3 of the License, or
9 | * (at your option) any later version.
10 | *
11 | *
FudanNLP is distributed in the hope that it will be useful,
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | * GNU Lesser General Public License for more details.
15 | *
16 | *
You should have received a copy of the GNU General Public License
17 | * along with FudanNLP. If not, see
18 | * http://www.gnu.org/licenses/.
19 | *
20 | *
FudanNLP is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU Lesser General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 |
FudanNLP is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU Lesser General Public License for more details.
21 |
22 |
You should have received a copy of the GNU General Public License
23 | along with FudanNLP. If not, see
24 | http://www.gnu.org/licenses/.
25 |
26 |
Copyright 2009-2012 fnlp.org. All rights reserved.
27 |
28 |
29 |
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/pipe/seq/MixedString2Sequence.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.pipe.seq;
21 |
22 | import org.fnlp.ml.types.Instance;
23 | import org.fnlp.nlp.cn.Chars;
24 | import org.fnlp.nlp.pipe.Pipe;
25 |
26 | /**
27 | * 处理混合语言字符串
28 | * @author Feng Ji
29 | *
30 | */
31 | public class MixedString2Sequence extends Pipe {
32 |
33 | @Override
34 | public void addThruPipe(Instance inst) throws Exception {
35 | String str = (String) inst.getData();
36 | char[] toks = str.toCharArray();
37 | StringBuilder sb = new StringBuilder();
38 | for(int i = 0; i < toks.length; i++) {
39 | if (Chars.isChar(toks[i])) {
40 | sb.append(toks[i]);
41 | sb.append(" ");
42 | }
43 | }
44 | }
45 |
46 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/pipe/seq/package.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Insert title here
6 |
7 |
8 |
数据特征转换器,针对序列标注数据。
9 |
10 |
This file is part of FudanNLP.
11 |
12 |
FudanNLP is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU Lesser General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 |
FudanNLP is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU Lesser General Public License for more details.
21 |
22 |
You should have received a copy of the GNU General Public License
23 | along with FudanNLP. If not, see
24 | http://www.gnu.org/licenses/.
25 |
26 |
Copyright 2009-2012 fnlp.org. All rights reserved.
27 |
28 |
29 |
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/pipe/seq/templet/Templet.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.pipe.seq.templet;
21 |
22 | import java.io.Serializable;
23 |
24 | import org.fnlp.ml.types.Instance;
25 | import org.fnlp.ml.types.alphabet.IFeatureAlphabet;
26 | /**
27 | * 模板接口
28 | * @author xpqiu
29 | *
30 | */
31 | public interface Templet extends Serializable{
32 |
33 | /**
34 | * 返回该模板的阶
35 | * @return 阶
36 | */
37 | public int getOrder();
38 |
39 | /**
40 | * 在给定实例的指定位置上抽取特征
41 | * @param instance 给定实例
42 | * @param pos 指定位置
43 | * @param numLabels 标签数量
44 | * @throws Exception
45 | */
46 | public int generateAt( Instance instance,
47 | IFeatureAlphabet features,
48 | int pos,
49 | int ... numLabels ) throws Exception;
50 |
51 | public int[] getVars();
52 |
53 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/pipe/seq/templet/package.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Insert title here
6 |
7 |
8 |
特征生成。
9 |
10 |
This file is part of FudanNLP.
11 |
12 |
FudanNLP is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU Lesser General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 |
FudanNLP is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU Lesser General Public License for more details.
21 |
22 |
You should have received a copy of the GNU General Public License
23 | along with FudanNLP. If not, see
24 | http://www.gnu.org/licenses/.
25 |
26 |
Copyright 2009-2012 fnlp.org. All rights reserved.
27 |
28 |
29 |
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/pipe/templet/Templet.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.pipe.templet;
21 |
22 | import java.io.Serializable;
23 |
24 | import org.fnlp.ml.types.Instance;
25 | import org.fnlp.ml.types.alphabet.IFeatureAlphabet;
26 | /**
27 | * 模板接口
28 | * @author xpqiu
29 | *
30 | */
31 | public interface Templet extends Serializable{
32 |
33 | /**
34 | * 在给定实例的指定位置上抽取特征
35 | * @param instance 给定实例
36 | * @param numLabels 标签数量
37 | * @throws Exception
38 | */
39 | public int[] generateAt( Instance instance,
40 | IFeatureAlphabet features,
41 | int numLabels ) throws Exception;
42 |
43 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/pipe/templet/TempletGroup.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.pipe.templet;
21 |
22 | import java.util.ArrayList;
23 | /**
24 | * 序列标注特征模板组,包含不同的特征生成方式
25 | * @author xpqiu
26 | *
27 | */
28 | public class TempletGroup extends ArrayList {
29 |
30 | private static final long serialVersionUID = 2584759562263226861L;
31 | /**
32 | * 模板标识
33 | */
34 | public int gid;
35 |
36 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/pipe/templet/package.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Insert title here
6 |
7 |
8 |
特征生成模板。
9 |
10 |
This file is part of FudanNLP.
11 |
12 |
FudanNLP is free software: you can redistribute it and/or modify
13 | it under the terms of the GNU Lesser General Public License as published by
14 | the Free Software Foundation, either version 3 of the License, or
15 | (at your option) any later version.
16 |
17 |
FudanNLP is distributed in the hope that it will be useful,
18 | but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | GNU Lesser General Public License for more details.
21 |
22 |
You should have received a copy of the GNU General Public License
23 | along with FudanNLP. If not, see
24 | http://www.gnu.org/licenses/.
25 |
26 |
Copyright 2009-2012 fnlp.org. All rights reserved.
27 |
28 |
29 |
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/similarity/ISimilarity.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.similarity;
21 |
22 | /**
23 | * @author xpqiu
24 | * @version 1.0
25 | * @since 1.0
26 | * ISimilarity
27 | */
28 | public interface ISimilarity {
29 |
30 | public float calc(E item1,E item2);
31 |
32 | }
--------------------------------------------------------------------------------
/fnlp-core/src/main/java/org/fnlp/nlp/similarity/JaccardSimilarity.java:
--------------------------------------------------------------------------------
1 | /**
2 | * This file is part of FNLP (formerly FudanNLP).
3 | *
4 | * FNLP is free software: you can redistribute it and/or modify
5 | * it under the terms of the GNU Lesser General Public License as published by
6 | * the Free Software Foundation, either version 3 of the License, or
7 | * (at your option) any later version.
8 | *
9 | * FNLP is distributed in the hope that it will be useful,
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | * GNU Lesser General Public License for more details.
13 | *
14 | * You should have received a copy of the GNU General Public License
15 | * along with FudanNLP. If not, see .
16 | *
17 | * Copyright 2009-2014 www.fnlp.org. All rights reserved.
18 | */
19 |
20 | package org.fnlp.nlp.similarity;
21 |
22 | import gnu.trove.iterator.hash.TObjectHashIterator;
23 | import gnu.trove.set.hash.THashSet;
24 |
25 | public class JaccardSimilarity implements ISimilarity> {
26 |
27 | public float calc(THashSet