Home | About | Sematext search-lucene.com search-hadoop.com
 Search Hadoop and all its subprojects:

Switch to Threaded View
Drill >> mail # dev >> [26/29] DRILL-221, part 3. More license updates. Automatically run Rat plugin with appropriate excludes. Move Synth-log to sandbox until it is part of main mvn build to avoid weird RAT problems.


Copy link to this message
-
[26/29] DRILL-221, part 3. More license updates. Automatically run Rat plugin with appropriate excludes. Move Synth-log to sandbox until it is part of main mvn build to avoid weird RAT problems.
http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/43d71f10/contrib/synth-log/src/test/java/org/apache/drill/synth/TermGeneratorTest.java
----------------------------------------------------------------------
diff --git a/contrib/synth-log/src/test/java/org/apache/drill/synth/TermGeneratorTest.java b/contrib/synth-log/src/test/java/org/apache/drill/synth/TermGeneratorTest.java
deleted file mode 100644
index b983eb2..0000000
--- a/contrib/synth-log/src/test/java/org/apache/drill/synth/TermGeneratorTest.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.synth;
-
-import com.google.common.base.Function;
-import com.google.common.collect.*;
-import org.apache.commons.math3.distribution.NormalDistribution;
-import org.apache.mahout.math.stats.LogLikelihood;
-import org.junit.Test;
-
-import java.util.List;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-public class TermGeneratorTest {
-
-    private static final WordGenerator WORDS = new WordGenerator("word-frequency-seed", "other-words");
-
-    @Test
-    public void generateTerms() {
-        TermGenerator x = new TermGenerator(WORDS, 1, 0.8);
-        final Multiset<String> counts = HashMultiset.create();
-        for (int i = 0; i < 10000; i++) {
-            counts.add(x.sample());
-        }
-
-        assertEquals(10000, counts.size());
-        assertTrue("Should have some common words", counts.elementSet().size() < 10000);
-        List<Integer> k = Lists.newArrayList(Iterables.transform(counts.elementSet(), new Function<String, Integer>() {
-            public Integer apply(String s) {
-                return counts.count(s);
-            }
-        }));
-//        System.out.printf("%s\n", Ordering.natural().reverse().sortedCopy(k).subList(0, 30));
-//        System.out.printf("%s\n", Iterables.transform(Iterables.filter(counts.elementSet(), new Predicate<String>() {
-//            public boolean apply(String s) {
-//                return counts.count(s) > 100;
-//            }
-//        }), new Function<String, String>() {
-//            public String apply(String s) {
-//                return s + ":" + counts.count(s);
-//            }
-//        }));
-        assertEquals(1, Ordering.natural().leastOf(k, 1).get(0).intValue());
-        assertTrue(Ordering.natural().greatestOf(k, 1).get(0) > 300);
-        assertTrue(counts.count("the") > 300);
-    }
-
-    @Test
-    public void distinctVocabularies() {
-        TermGenerator x1 = new TermGenerator(WORDS, 1, 0.8);
-        final Multiset<String> k1 = HashMultiset.create();
-        for (int i = 0; i < 50000; i++) {
-            k1.add(x1.sample());
-        }
-
-        TermGenerator x2 = new TermGenerator(WORDS, 1, 0.8);
-        final Multiset<String> k2 = HashMultiset.create();
-        for (int i = 0; i < 50000; i++) {
-            k2.add(x2.sample());
-        }
-
-        final NormalDistribution normal = new NormalDistribution();
-        List<Double> scores = Ordering.natural().sortedCopy(Iterables.transform(k1.elementSet(),
-                new Function<String, Double>() {
-                    public Double apply(String s) {
-                        return normal.cumulativeProbability(LogLikelihood.rootLogLikelihoodRatio(k1.count(s), 50000 - k1.count(s), k2.count(s), 50000 - k2.count(s)));
-                    }
-                }));
-        int n = scores.size();
-//        System.out.printf("%.5f, %.5f, %.5f, %.5f, %.5f, %.5f, %.5f", scores.get(0), scores.get((int) (0.05*n)), scores.get(n / 4), scores.get(n / 2), scores.get(3 * n / 4), scores.get((int) (0.95 * n)), scores.get(n - 1));
-        int i = 0;
-        for (Double score : scores) {
-            if (i % 10 == 0) {
-                System.out.printf("%.6f\t%.6f\n", (double) i / n, score);
-            }
-
-            i++;
-        }
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/43d71f10/contrib/synth-log/src/test/java/org/apache/drill/synth/WordGeneratorTest.java
diff --git a/contrib/synth-log/src/test/java/org/apache/drill/synth/WordGeneratorTest.java b/contrib/synth-log/src/test/java/org/apache/drill/synth/WordGeneratorTest.java
deleted file mode 100644
index 0bfdf06..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.synth;
-
-import org.junit.Test;
-
-import static org.junit.Assert.ass