unstick r1873947

Revision 1873947

Date:
2020/02/12 15:27:05
Author:
szita
Revision Log:
PIG-4764: Make Pig work with Hive 3.1 (szita)
Files:

Legend:

 
Added
 
Removed
 
Modified
  • pig/trunk/build.xml

     
    154 154 <condition property="isWindows">
    155 155 <os family="windows"/>
    156 156 </condition>
    157
    157
    158 158 <target name="setTezEnv">
    159 159 <propertyreset name="test.timeout" value="900000" />
    160 160 <propertyreset name="hadoopversion" value="2" />
     
    241 241 </if>
    242 242 <property name="hbaseversion" value="1" />
    243 243 <property name="sparkversion" value="1" />
    244 <property name="hiveversion" value="1" />
    244 245
    245 246 <condition property="src.exclude.dir" value="**/Spark2*.java" else="**/Spark1*.java">
    246 247 <equals arg1="${sparkversion}" arg2="1"/>
     
    248 249
    249 250 <property name="src.shims.dir" value="${basedir}/shims/src/hadoop${hadoopversion}" />
    250 251 <property name="src.shims.test.dir" value="${basedir}/shims/test/hadoop${hadoopversion}" />
    252 <property name="src.hive.shims.dir" value="${basedir}/shims/src/hive${hiveversion}" />
    251 253
    252 254 <property name="asfrepo" value="https://repository.apache.org"/>
    253 255 <property name="asfsnapshotrepo" value="${asfrepo}/content/repositories/snapshots"/>
     
    353 355 <source path="${test.e2e.dir}/udfs/java"/>
    354 356 <source path="${src.shims.dir}"/>
    355 357 <source path="${src.shims.test.dir}"/>
    358 <source path="${src.hive.shims.dir}"/>
    356 359 <source path="tutorial/src"/>
    357 360 <source path="${test.src.dir}" excluding="e2e/pig/udfs/java/|resources/|perf/"/>
    358 361 <output path="${build.dir.eclipse-main-classes}" />
     
    568 571 <echo>*** Building Main Sources ***</echo>
    569 572 <echo>*** To compile with all warnings enabled, supply -Dall.warnings=1 on command line ***</echo>
    570 573 <echo>*** Else, you will only be warned about deprecations ***</echo>
    571 <echo>*** Hadoop version used: ${hadoopversion} ; HBase version used: ${hbaseversion} ; Spark version used: ${sparkversion} ***</echo>
    572 <compileSources sources="${src.dir};${src.gen.dir};${src.lib.dir}/bzip2;${src.shims.dir}"
    574 <echo>*** Hadoop version used: ${hadoopversion} ; HBase version used: ${hbaseversion} ; Spark version used: ${sparkversion} ; Hive version used: ${hiveversion} ***</echo>
    575 <compileSources sources="${src.dir};${src.gen.dir};${src.lib.dir}/bzip2;${src.shims.dir};${src.hive.shims.dir}"
    573 576 excludes="${src.exclude.dir}" dist="${build.classes}" cp="classpath" warnings="${javac.args.warnings}" />
    574 577 <copy todir="${build.classes}/META-INF">
    575 578 <fileset dir="${src.dir}/META-INF" includes="**"/>
     
    734 737 <fileset dir="${ivy.lib.dir}" includes="metrics-core-*.jar"/>
    735 738 <fileset dir="${ivy.lib.dir}" includes="hbase-*.jar" excludes="hbase-*tests.jar,hbase-*hadoop2*.jar"/>
    736 739 <fileset dir="${ivy.lib.dir}" includes="hive-*.jar" excludes="hive-shims-0.*.jar, hive-contrib*.jar"/>
    740 <fileset dir="${ivy.lib.dir}" includes="minlog-*.jar"/>
    737 741 <fileset dir="${ivy.lib.dir}" includes="protobuf-java-*.jar"/>
    738 742 <fileset dir="${ivy.lib.dir}" includes="zookeeper-*.jar"/>
    739 743 <fileset dir="${ivy.lib.dir}" includes="accumulo-*.jar" excludes="accumulo-minicluster*.jar"/>
     
    1161 1165 <fileset dir="${basedir}/shims" />
    1162 1166 </copy>
    1163 1167
    1168 <copy todir="${tar.dist.dir}/hive-shims" includeEmptyDirs="true">
    1169 <fileset dir="${basedir}/hive-shims" />
    1170 </copy>
    1171
    1164 1172 <copy todir="${tar.dist.dir}/lib-src" includeEmptyDirs="true">
    1165 1173 <fileset dir="${src.lib.dir}" />
    1166 1174 </copy>
     
    1236 1244 <include name="lib-src/**"/>
    1237 1245 <include name="license/**"/>
    1238 1246 <include name="shims/**"/>
    1247 <include name="hive-shims/**"/>
    1239 1248 <include name="src/**"/>
    1240 1249 <include name="test/**"/>
    1241 1250 <exclude name="test/**/*.jar"/>
     
    1723 1732
    1724 1733 <target name="ivy-resolve" depends="ivy-init" unless="ivy.resolved" description="Resolve Ivy dependencies">
    1725 1734 <property name="ivy.resolved" value="true"/>
    1726 <echo>*** Ivy resolve with Hadoop ${hadoopversion}, Spark ${sparkversion} and HBase ${hbaseversion} ***</echo>
    1735 <echo>*** Ivy resolve with Hadoop ${hadoopversion}, Spark ${sparkversion}, HBase ${hbaseversion}, Hive ${hiveversion} ***</echo>
    1727 1736 <ivy:resolve log="${loglevel}" settingsRef="${ant.project.name}.ivy.settings" conf="compile"/>
    1728 1737 <ivy:report toDir="build/ivy/report"/>
    1729 1738 </target>
  • pig/trunk/CHANGES.txt

     
    26 26
    27 27 IMPROVEMENTS
    28 28
    29 PIG-4764: Make Pig work with Hive 3.1 (szita)
    30
    29 31 PIG-5352: Please add OWASP Dependency Check to the build ivy.xml (knoguchi)
    30 32
    31 33 PIG-5385: Skip calling extra gc() before spilling large bag when unnecessary (knoguchi)
  • pig/trunk/ivy.xml

     
    31 31 <conf name="default" extends="master,runtime"/>
    32 32 <conf name="runtime" extends="compile,test" description="runtime but not the artifact" />
    33 33 <!--Private configurations. -->
    34 <conf name="compile" extends="hadoop${hadoopversion},hbase${hbaseversion}" visibility="private" description="compile artifacts"/>
    34 <conf name="compile" extends="hadoop${hadoopversion},hbase${hbaseversion},hive${hiveversion}" visibility="private" description="compile artifacts"/>
    35 35 <conf name="test" extends="compile" visibility="private"/>
    36 36 <conf name="javadoc" visibility="private" extends="compile,test"/>
    37 37 <conf name="releaseaudit" visibility="private"/>
     
    43 43 <conf name="hbase2" visibility="private"/>
    44 44 <conf name="spark1" visibility="private" />
    45 45 <conf name="spark2" visibility="private" />
    46 <conf name="hive1" visibility="private"/>
    47 <conf name="hive3" visibility="private"/>
    46 48 <conf name="owasp" visibility="private" description="Artifacts required for owasp target"/>
    47 49 </configurations>
    48 50 <publications>
     
    525 527 <!-- for piggybank -->
    526 528 <dependency org="org.hsqldb" name="hsqldb" rev="${hsqldb.version}"
    527 529 conf="test->default" />
    528 <dependency org="org.apache.hive" name="hive-exec" rev="${hive.version}" conf="compile->master" changing="true">
    530
    531 <!-- Hive 1 -->
    532 <dependency org="org.apache.hive" name="hive-exec" rev="${hive1.version}" conf="hive1->master" changing="true">
    529 533 <artifact name="hive-exec" m:classifier="core" />
    530 534 </dependency>
    535 <dependency org="org.apache.hive" name="hive-serde" rev="${hive1.version}" changing="true"
    536 conf="hive1->master" />
    537 <dependency org="org.apache.hive" name="hive-common" rev="${hive1.version}" changing="true"
    538 conf="hive1->master" />
    539 <dependency org="org.apache.hive.shims" name="hive-shims-common" rev="${hive1.version}" changing="true"
    540 conf="hive1->master" />
    541 <dependency org="org.apache.hive" name="hive-contrib" rev="${hive1.version}" changing="true"
    542 conf="test->master" />
    543 <dependency org="org.apache.hive.shims" name="hive-shims-0.23" rev="${hive1.version}" changing="true"
    544 conf="hive1->master" />
    545
    546 <!-- Hive 3 -->
    547 <dependency org="org.apache.hive" name="hive-exec" rev="${hive.version}" conf="hive3->master" changing="true">
    548 <artifact name="hive-exec" m:classifier="core" />
    549 </dependency>
    531 550 <dependency org="org.apache.hive" name="hive-serde" rev="${hive.version}" changing="true"
    532 conf="compile->master" />
    551 conf="hive3->master" />
    533 552 <dependency org="org.apache.hive" name="hive-common" rev="${hive.version}" changing="true"
    534 conf="compile->master" />
    553 conf="hive3->master" />
    535 554 <dependency org="org.apache.hive.shims" name="hive-shims-common" rev="${hive.version}" changing="true"
    536 conf="compile->master" />
    555 conf="hive3->master" />
    537 556 <dependency org="org.apache.hive" name="hive-contrib" rev="${hive.version}" changing="true"
    538 557 conf="test->master" />
    558 <dependency org="org.apache.hive" name="hive-llap-common" rev="${hive.version}" changing="true"
    559 conf="hive3->master" />
    539 560 <dependency org="org.apache.hive.shims" name="hive-shims-0.23" rev="${hive.version}" changing="true"
    540 conf="hadoop2->master" />
    561 conf="hive3->master" />
    562
    563
    564 <dependency org="org.apache.orc" name="orc-core" rev="${orc.version}" changing="true" conf="hive3->default" />
    565 <dependency org="org.apache.hive" name="hive-storage-api" rev="${hive-storage-api.version}" changing="true" conf="hive3->master" />
    541 566 <dependency org="org.iq80.snappy" name="snappy" rev="${snappy.version}"
    542 567 conf="test->master" />
    543 <dependency org="com.esotericsoftware.kryo" name="kryo" rev="${kryo.version}"
    544 conf="compile->master" />
    568 <dependency org="com.esotericsoftware" name="kryo-shaded" rev="${kryo.version}"
    569 conf="hive3->default" />
    570 <dependency org="com.esotericsoftware.kryo" name="kryo" rev="2.22"
    571 conf="hive1->default" />
    545 572 <dependency org="org.apache.commons" name="commons-lang3" rev="${commons-lang3.version}"
    546 573 conf="compile->master" />
    547 574
  • pig/trunk/ivy/libraries.properties

     
    41 41 hbase1.version=1.2.4
    42 42 hbase2.version=2.0.0
    43 43 hsqldb.version=2.4.0
    44 hive.version=1.2.1
    44 hive1.version=1.2.1
    45 hive.version=3.1.2
    46 hive-storage-api.version=2.7.0
    47 orc.version=1.5.6
    45 48 httpcomponents.version=4.4
    46 49 jackson.version=1.9.13
    47 50 jackson-pig-3039-test.version=1.9.9
     
    59 62 junit.version=4.11
    60 63 jruby.version=1.7.26
    61 64 jython.version=2.7.1
    62 kryo.version=2.22
    65 kryo.version=3.0.3
    63 66 rhino.version=1.7R2
    64 67 antlr.version=3.4
    65 68 stringtemplate.version=4.0.4
  • pig/trunk/shims/src/hive1/org/apache/pig/hive/HiveShims.java

     
    1 /*
    2 * Licensed to the Apache Software Foundation (ASF) under one
    3 * or more contributor license agreements. See the NOTICE file
    4 * distributed with this work for additional information
    5 * regarding copyright ownership. The ASF licenses this file
    6 * to you under the Apache License, Version 2.0 (the
    7 * "License"); you may not use this file except in compliance
    8 * with the License. You may obtain a copy of the License at
    9 *
    10 * http://www.apache.org/licenses/LICENSE-2.0
    11 *
    12 * Unless required by applicable law or agreed to in writing, software
    13 * distributed under the License is distributed on an "AS IS" BASIS,
    14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15 * See the License for the specific language governing permissions and
    16 * limitations under the License.
    17 */
    18 package org.apache.pig.hive;
    19
    20 import java.math.BigDecimal;
    21 import java.math.BigInteger;
    22 import java.sql.Timestamp;
    23
    24 import org.apache.hadoop.hive.conf.HiveConf;
    25 import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
    26 import org.apache.hadoop.hive.ql.io.orc.OrcFile;
    27 import org.apache.hadoop.hive.ql.io.orc.OrcFile.Version;
    28 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
    29 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
    30 import org.apache.hadoop.hive.ql.udf.generic.Collector;
    31 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo;
    32 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
    33 import org.apache.hadoop.hive.ql.udf.generic.SimpleGenericUDAFParameterInfo;
    34 import org.apache.hadoop.hive.serde2.AbstractSerDe;
    35 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
    36 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    37 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
    38 import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
    39 import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
    40 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
    41 import org.apache.hadoop.hive.shims.HadoopShimsSecure;
    42 import org.apache.hadoop.hive.shims.ShimLoader;
    43 import org.apache.hadoop.mapreduce.Job;
    44
    45 import com.esotericsoftware.kryo.Serializer;
    46 import com.esotericsoftware.kryo.io.Input;
    47
    48 import org.joda.time.DateTime;
    49
    50 public class HiveShims {
    51 public static String normalizeOrcVersionName(String version) {
    52 return Version.byName(version).getName();
    53 }
    54
    55 public static void addLessThanOpToBuilder(SearchArgument.Builder builder,
    56 String columnName, PredicateLeaf.Type columnType, Object value) {
    57 builder.lessThan(columnName, value);
    58 }
    59
    60 public static void addLessThanEqualsOpToBuilder(SearchArgument.Builder builder,
    61 String columnName, PredicateLeaf.Type columnType, Object value) {
    62 builder.lessThanEquals(columnName, value);
    63 }
    64
    65 public static void addEqualsOpToBuilder(SearchArgument.Builder builder,
    66 String columnName, PredicateLeaf.Type columnType, Object value) {
    67 builder.equals(columnName, value);
    68 }
    69
    70 public static void addBetweenOpToBuilder(SearchArgument.Builder builder,
    71 String columnName, PredicateLeaf.Type columnType, Object low, Object high) {
    72 builder.between(columnName, low, high);
    73 }
    74
    75 public static void addIsNullOpToBuilder(SearchArgument.Builder builder,
    76 String columnName, PredicateLeaf.Type columnType) {
    77 builder.isNull(columnName);
    78 }
    79
    80 public static Class[] getOrcDependentClasses(Class hadoopVersionShimsClass) {
    81 return new Class[]{OrcFile.class, HiveConf.class, AbstractSerDe.class,
    82 org.apache.hadoop.hive.shims.HadoopShims.class, HadoopShimsSecure.class, hadoopVersionShimsClass,
    83 Input.class};
    84 }
    85
    86 public static Class[] getHiveUDFDependentClasses(Class hadoopVersionShimsClass) {
    87 return new Class[]{GenericUDF.class,
    88 PrimitiveObjectInspector.class, HiveConf.class, Serializer.class, ShimLoader.class,
    89 hadoopVersionShimsClass, HadoopShimsSecure.class, Collector.class};
    90 }
    91
    92 public static Object getSearchArgObjValue(Object value) {
    93 if (value instanceof BigInteger) {
    94 return new BigDecimal((BigInteger) value);
    95 } else if (value instanceof DateTime) {
    96 return new Timestamp(((DateTime) value).getMillis());
    97 } else {
    98 return value;
    99 }
    100 }
    101
    102 public static void setOrcConfigOnJob(Job job, Long stripeSize, Integer rowIndexStride, Integer bufferSize, Boolean blockPadding, CompressionKind compress, String versionName) {
    103 if (stripeSize != null) {
    104 job.getConfiguration().setLong(HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE.varname, stripeSize);
    105 }
    106 if (rowIndexStride != null) {
    107 job.getConfiguration().setInt(HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE.varname, rowIndexStride);
    108 }
    109 if (bufferSize != null) {
    110 job.getConfiguration().setInt(HiveConf.ConfVars.HIVE_ORC_DEFAULT_BUFFER_SIZE.varname, bufferSize);
    111 }
    112 if (blockPadding != null) {
    113 job.getConfiguration().setBoolean(HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_PADDING.varname, blockPadding);
    114 }
    115 if (compress != null) {
    116 job.getConfiguration().set(HiveConf.ConfVars.HIVE_ORC_DEFAULT_COMPRESS.varname, compress.toString());
    117 }
    118 if (versionName != null) {
    119 job.getConfiguration().set(HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT.varname, versionName);
    120 }
    121 }
    122
    123 public static class PigJodaTimeStampObjectInspector extends
    124 AbstractPrimitiveJavaObjectInspector implements TimestampObjectInspector {
    125
    126 public PigJodaTimeStampObjectInspector() {
    127 super(TypeInfoFactory.timestampTypeInfo);
    128 }
    129
    130 @Override
    131 public TimestampWritable getPrimitiveWritableObject(Object o) {
    132 return o == null ? null : new TimestampWritable(new Timestamp(((DateTime) o).getMillis()));
    133 }
    134
    135 @Override
    136 public Timestamp getPrimitiveJavaObject(Object o) {
    137 return o == null ? null : new Timestamp(((DateTime) o).getMillis());
    138 }
    139 }
    140
    141 public static GenericUDAFParameterInfo newSimpleGenericUDAFParameterInfo(ObjectInspector[] arguments,
    142 boolean distinct, boolean allColumns) {
    143 return new SimpleGenericUDAFParameterInfo(arguments, distinct, allColumns);
    144 }
    145
    146 public static class TimestampShim {
    147
    148 public static Timestamp cast(Object ts) {
    149 return (Timestamp) ts;
    150 }
    151
    152 public static long millisFromTimestamp(Object ts) {
    153 return cast(ts).getTime();
    154 }
    155 }
    156
    157 public static class TimestampWritableShim {
    158
    159 public static boolean isAssignableFrom(Object object) {
    160 return object instanceof TimestampWritable;
    161 }
    162
    163 public static TimestampWritable cast(Object ts) {
    164 return (TimestampWritable) ts;
    165 }
    166
    167 public static long millisFromTimestampWritable(Object ts) {
    168 return cast(ts).getTimestamp().getTime();
    169 }
    170 }
    171 }
  • pig/trunk/shims/src/hive3/org/apache/pig/hive/HiveShims.java

     
    1 /*
    2 * Licensed to the Apache Software Foundation (ASF) under one
    3 * or more contributor license agreements. See the NOTICE file
    4 * distributed with this work for additional information
    5 * regarding copyright ownership. The ASF licenses this file
    6 * to you under the Apache License, Version 2.0 (the
    7 * "License"); you may not use this file except in compliance
    8 * with the License. You may obtain a copy of the License at
    9 *
    10 * http://www.apache.org/licenses/LICENSE-2.0
    11 *
    12 * Unless required by applicable law or agreed to in writing, software
    13 * distributed under the License is distributed on an "AS IS" BASIS,
    14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15 * See the License for the specific language governing permissions and
    16 * limitations under the License.
    17 */
    18 package org.apache.pig.hive;
    19
    20 import java.math.BigDecimal;
    21 import java.math.BigInteger;
    22
    23 import org.apache.hadoop.hive.common.type.HiveDecimal;
    24 import org.apache.hadoop.hive.common.type.Timestamp;
    25 import org.apache.hadoop.hive.conf.HiveConf;
    26 import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
    27 import org.apache.hadoop.hive.ql.io.orc.OrcFile;
    28 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
    29 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
    30 import org.apache.hadoop.hive.ql.udf.generic.Collector;
    31 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo;
    32 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
    33 import org.apache.hadoop.hive.ql.udf.generic.SimpleGenericUDAFParameterInfo;
    34 import org.apache.hadoop.hive.serde2.AbstractSerDe;
    35 import org.apache.hadoop.hive.serde2.io.DateWritable;
    36 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
    37 import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
    38 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    39 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
    40 import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector;
    41 import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
    42 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
    43 import org.apache.hadoop.hive.shims.HadoopShimsSecure;
    44 import org.apache.hadoop.hive.shims.ShimLoader;
    45 import org.apache.hadoop.mapreduce.Job;
    46 import org.apache.orc.OrcConf;
    47 import org.apache.orc.OrcFile.Version;
    48
    49 import com.esotericsoftware.kryo.Serializer;
    50 import com.esotericsoftware.kryo.io.Input;
    51
    52 import org.joda.time.DateTime;
    53
    54
    55 public class HiveShims {
    56 public static String normalizeOrcVersionName(String version) {
    57 return Version.byName(version).getName();
    58 }
    59
    60 public static void addLessThanOpToBuilder(SearchArgument.Builder builder,
    61 String columnName, PredicateLeaf.Type columnType, Object value) {
    62 builder.lessThan(columnName, columnType, value);
    63 }
    64
    65 public static void addLessThanEqualsOpToBuilder(SearchArgument.Builder builder,
    66 String columnName, PredicateLeaf.Type columnType, Object value) {
    67 builder.lessThanEquals(columnName, columnType, value);
    68 }
    69
    70 public static void addEqualsOpToBuilder(SearchArgument.Builder builder,
    71 String columnName, PredicateLeaf.Type columnType, Object value) {
    72 builder.equals(columnName, columnType, value);
    73 }
    74
    75 public static void addBetweenOpToBuilder(SearchArgument.Builder builder,
    76 String columnName, PredicateLeaf.Type columnType, Object low, Object high) {
    77 builder.between(columnName, columnType, low, high);
    78 }
    79
    80 public static void addIsNullOpToBuilder(SearchArgument.Builder builder,
    81 String columnName, PredicateLeaf.Type columnType) {
    82 builder.isNull(columnName, columnType);
    83 }
    84
    85 public static Class[] getOrcDependentClasses(Class hadoopVersionShimsClass) {
    86 return new Class[]{OrcFile.class, HiveConf.class, AbstractSerDe.class,
    87 org.apache.hadoop.hive.shims.HadoopShims.class, HadoopShimsSecure.class, DateWritable.class,
    88 hadoopVersionShimsClass, Input.class, org.apache.orc.OrcFile.class,
    89 com.esotericsoftware.minlog.Log.class};
    90 }
    91
    92 public static Class[] getHiveUDFDependentClasses(Class hadoopVersionShimsClass) {
    93 return new Class[]{GenericUDF.class,
    94 PrimitiveObjectInspector.class, HiveConf.class, Serializer.class, ShimLoader.class,
    95 hadoopVersionShimsClass, HadoopShimsSecure.class, Collector.class, HiveDecimalWritable.class};
    96 }
    97
    98 public static Object getSearchArgObjValue(Object value) {
    99 if (value instanceof Integer) {
    100 return new Long((Integer) value);
    101 } else if (value instanceof Float) {
    102 return new Double((Float) value);
    103 } else if (value instanceof BigInteger) {
    104 return new HiveDecimalWritable(HiveDecimal.create((BigInteger) value));
    105 } else if (value instanceof BigDecimal) {
    106 return new HiveDecimalWritable(HiveDecimal.create((BigDecimal) value));
    107 } else if (value instanceof DateTime) {
    108 return new java.sql.Date(((DateTime) value).getMillis());
    109 } else {
    110 return value;
    111 }
    112 }
    113
    114 public static void setOrcConfigOnJob(Job job, Long stripeSize, Integer rowIndexStride, Integer bufferSize,
    115 Boolean blockPadding, CompressionKind compress, String versionName) {
    116 if (stripeSize != null) {
    117 job.getConfiguration().setLong(OrcConf.STRIPE_SIZE.getAttribute(), stripeSize);
    118 }
    119 if (rowIndexStride != null) {
    120 job.getConfiguration().setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), rowIndexStride);
    121 }
    122 if (bufferSize != null) {
    123 job.getConfiguration().setInt(OrcConf.BUFFER_SIZE.getAttribute(), bufferSize);
    124 }
    125 if (blockPadding != null) {
    126 job.getConfiguration().setBoolean(OrcConf.BLOCK_PADDING.getAttribute(), blockPadding);
    127 }
    128 if (compress != null) {
    129 job.getConfiguration().set(OrcConf.COMPRESS.getAttribute(), compress.toString());
    130 }
    131 if (versionName != null) {
    132 job.getConfiguration().set(OrcConf.WRITE_FORMAT.getAttribute(), versionName);
    133 }
    134 }
    135
    136 public static class PigJodaTimeStampObjectInspector extends
    137 AbstractPrimitiveJavaObjectInspector implements TimestampObjectInspector {
    138
    139 public PigJodaTimeStampObjectInspector() {
    140 super(TypeInfoFactory.timestampTypeInfo);
    141 }
    142
    143 private static Timestamp getHiveTimeStampFromDateTime(Object o) {
    144 if (o == null) {
    145 return null;
    146 }
    147 Timestamp ts = new Timestamp();
    148 ts.setTimeInMillis(((DateTime) o).getMillis());
    149 return ts;
    150 }
    151
    152 @Override
    153 public TimestampWritableV2 getPrimitiveWritableObject(Object o) {
    154 return o == null ? null : new TimestampWritableV2(getHiveTimeStampFromDateTime(o));
    155 }
    156
    157 @Override
    158 public Timestamp getPrimitiveJavaObject(Object o) {
    159 return o == null ? null : new Timestamp(getHiveTimeStampFromDateTime(o));
    160 }
    161 }
    162
    163 public static GenericUDAFParameterInfo newSimpleGenericUDAFParameterInfo(ObjectInspector[] arguments,
    164 boolean distinct, boolean allColumns) {
    165 return new SimpleGenericUDAFParameterInfo(arguments, false, distinct, allColumns);
    166 }
    167
    168 public static class TimestampShim {
    169
    170 public static Timestamp cast(Object ts) {
    171 return (Timestamp) ts;
    172 }
    173
    174 public static long millisFromTimestamp(Object ts) {
    175 return cast(ts).toEpochMilli();
    176 }
    177 }
    178
    179 public static class TimestampWritableShim {
    180
    181 public static boolean isAssignableFrom(Object object) {
    182 return object instanceof TimestampWritableV2;
    183 }
    184
    185 public static TimestampWritableV2 cast(Object ts) {
    186 return (TimestampWritableV2) ts;
    187 }
    188
    189 public static long millisFromTimestampWritable(Object ts) {
    190 return cast(ts).getTimestamp().toEpochMilli();
    191 }
    192 }
    193
    194 }
  • pig/trunk/src/org/apache/pig/builtin/HiveUDAF.java

     
    42 42 import org.apache.pig.data.DataBag;
    43 43 import org.apache.pig.data.Tuple;
    44 44 import org.apache.pig.data.TupleFactory;
    45 import org.apache.pig.hive.HiveShims;
    45 46 import org.apache.pig.impl.logicalLayer.schema.Schema;
    46 47 import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
    47 48 import org.apache.pig.impl.util.hive.HiveUtils;
     
    105 106
    106 107 if (udaf instanceof GenericUDAFResolver2) {
    107 108 GenericUDAFParameterInfo paramInfo =
    108 new SimpleGenericUDAFParameterInfo(
    109 arguments, false, false);
    109 HiveShims.newSimpleGenericUDAFParameterInfo(arguments, false, false);
    110 110 evaluator = ((GenericUDAFResolver2)udaf).getEvaluator(paramInfo);
    111 111 } else {
    112 112 TypeInfo[] params = ((StructTypeInfo)inputTypeInfo)
  • pig/trunk/src/org/apache/pig/builtin/HiveUDFBase.java

     
    49 49 import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRConfiguration;
    50 50 import org.apache.pig.data.DataType;
    51 51 import org.apache.pig.data.Tuple;
    52 import org.apache.pig.hive.HiveShims;
    52 53 import org.apache.pig.impl.PigContext;
    53 54 import org.apache.pig.impl.util.UDFContext;
    54 55 import org.apache.pig.impl.util.Utils;
     
    181 182
    182 183 @Override
    183 184 public List<String> getShipFiles() {
    184 List<String> files = FuncUtils.getShipFiles(new Class[] {GenericUDF.class,
    185 PrimitiveObjectInspector.class, HiveConf.class, Serializer.class, ShimLoader.class,
    186 Hadoop23Shims.class, HadoopShimsSecure.class, Collector.class});
    185 List<String> files = FuncUtils.getShipFiles(HiveShims.getHiveUDFDependentClasses(Hadoop23Shims.class));
    187 186 return files;
    188 187 }
    189 188
  • pig/trunk/src/org/apache/pig/builtin/OrcStorage.java

     
    17 17 */
    18 18 package org.apache.pig.builtin;
    19 19
    20 import java.io.ByteArrayOutputStream;
    20 21 import java.io.IOException;
    21 22 import java.math.BigDecimal;
    22 23 import java.math.BigInteger;
    23 import java.sql.Timestamp;
    24 24 import java.util.ArrayList;
    25 25 import java.util.Arrays;
    26 26 import java.util.List;
     
    33 33 import org.apache.commons.cli.HelpFormatter;
    34 34 import org.apache.commons.cli.Options;
    35 35 import org.apache.commons.cli.ParseException;
    36 import org.apache.commons.codec.binary.Base64;
    36 37 import org.apache.commons.logging.Log;
    37 38 import org.apache.commons.logging.LogFactory;
    38 39 import org.apache.hadoop.fs.FileStatus;
    39 40 import org.apache.hadoop.fs.FileSystem;
    40 41 import org.apache.hadoop.fs.Path;
    41 42 import org.apache.hadoop.fs.PathFilter;
    42 import org.apache.hadoop.hive.conf.HiveConf;
    43 43 import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
    44 44 import org.apache.hadoop.hive.ql.io.orc.OrcFile;
    45 45 import org.apache.hadoop.hive.ql.io.orc.OrcNewInputFormat;
     
    47 47 import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
    48 48 import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
    49 49 import org.apache.hadoop.hive.ql.io.orc.Reader;
    50 import org.apache.hadoop.hive.ql.io.orc.OrcFile.Version;
    50 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
    51 51 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
    52 52 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.Builder;
    53 53 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
    54 import org.apache.hadoop.hive.serde2.AbstractSerDe;
    55 54 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
    56 55 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    57 56 import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
    58 57 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
    59 58 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
    60 59 import org.apache.hadoop.hive.shims.Hadoop23Shims;
    61 import org.apache.hadoop.hive.shims.HadoopShimsSecure;
    62 60 import org.apache.hadoop.mapreduce.InputFormat;
    63 61 import org.apache.hadoop.mapreduce.Job;
    64 62 import org.apache.hadoop.mapreduce.OutputFormat;
     
    68 66 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    69 67 import org.apache.pig.Expression;
    70 68 import org.apache.pig.Expression.BetweenExpression;
    69 import org.apache.pig.Expression.BinaryExpression;
    71 70 import org.apache.pig.Expression.Column;
    72 71 import org.apache.pig.Expression.Const;
    73 72 import org.apache.pig.Expression.InExpression;
     
    80 79 import org.apache.pig.PigException;
    81 80 import org.apache.pig.PigWarning;
    82 81 import org.apache.pig.ResourceSchema;
    83 import org.apache.pig.StoreFunc;
    84 import org.apache.pig.Expression.BinaryExpression;
    85 82 import org.apache.pig.ResourceSchema.ResourceFieldSchema;
    86 83 import org.apache.pig.ResourceStatistics;
    84 import org.apache.pig.StoreFunc;
    87 85 import org.apache.pig.StoreFuncInterface;
    88 86 import org.apache.pig.StoreResources;
    89 87 import org.apache.pig.backend.executionengine.ExecException;
    90 88 import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
    91 89 import org.apache.pig.data.DataType;
    92 90 import org.apache.pig.data.Tuple;
    91 import org.apache.pig.hive.HiveShims;
    93 92 import org.apache.pig.impl.logicalLayer.FrontendException;
    94 93 import org.apache.pig.impl.util.ObjectSerializer;
    95 94 import org.apache.pig.impl.util.UDFContext;
    96 95 import org.apache.pig.impl.util.Utils;
    97 96 import org.apache.pig.impl.util.hive.HiveUtils;
    98 import org.joda.time.DateTime;
    99 97
    100 import com.esotericsoftware.kryo.io.Input;
    98 import com.esotericsoftware.kryo.Kryo;
    99 import com.esotericsoftware.kryo.io.Output;
    101 100 import com.google.common.annotations.VisibleForTesting;
    102 101
    102 import org.joda.time.DateTime;
    103
    103 104 /**
    104 105 * A load function and store function for ORC file.
    105 106 * An optional constructor argument is provided that allows one to customize
     
    133 134 private Integer bufferSize;
    134 135 private Boolean blockPadding;
    135 136 private CompressionKind compress;
    136 private Version version;
    137 private String versionName;
    137 138
    138 139 private static final Options validOptions;
    139 140 private final CommandLineParser parser = new GnuParser();
     
    182 183 compress = CompressionKind.valueOf(configuredOptions.getOptionValue('c'));
    183 184 }
    184 185 if (configuredOptions.hasOption('v')) {
    185 version = Version.byName(configuredOptions.getOptionValue('v'));
    186 versionName = HiveShims.normalizeOrcVersionName(configuredOptions.getOptionValue('v'));
    186 187 }
    187 188 } catch (ParseException e) {
    188 189 log.error("Exception in OrcStorage", e);
     
    207 208 @Override
    208 209 public void setStoreLocation(String location, Job job) throws IOException {
    209 210 if (!UDFContext.getUDFContext().isFrontend()) {
    210 if (stripeSize!=null) {
    211 job.getConfiguration().setLong(HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE.varname,
    212 stripeSize);
    213 }
    214 if (rowIndexStride!=null) {
    215 job.getConfiguration().setInt(HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE.varname,
    216 rowIndexStride);
    217 }
    218 if (bufferSize!=null) {
    219 job.getConfiguration().setInt(HiveConf.ConfVars.HIVE_ORC_DEFAULT_BUFFER_SIZE.varname,
    220 bufferSize);
    221 }
    222 if (blockPadding!=null) {
    223 job.getConfiguration().setBoolean(HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_PADDING.varname,
    224 blockPadding);
    225 }
    226 if (compress!=null) {
    227 job.getConfiguration().set(HiveConf.ConfVars.HIVE_ORC_DEFAULT_COMPRESS.varname,
    228 compress.toString());
    229 }
    230 if (version!=null) {
    231 job.getConfiguration().set(HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT.varname,
    232 version.getName());
    233 }
    211 HiveShims.setOrcConfigOnJob(job, stripeSize, rowIndexStride, bufferSize, blockPadding, compress,
    212 versionName);
    234 213 }
    235 214 FileOutputFormat.setOutputPath(job, new Path(location));
    236 215 if (typeInfo==null) {
     
    396 375
    397 376 @Override
    398 377 public List<String> getShipFiles() {
    399 Class[] classList = new Class[] {OrcFile.class, HiveConf.class, AbstractSerDe.class,
    400 org.apache.hadoop.hive.shims.HadoopShims.class, HadoopShimsSecure.class, Hadoop23Shims.class,
    401 Input.class};
    378 Class[] classList = HiveShims.getOrcDependentClasses(Hadoop23Shims.class);
    402 379 return FuncUtils.getShipFiles(classList);
    403 380 }
    404 381
     
    582 559 log.info("Pushdown predicate SearchArgument is:\n" + sArg);
    583 560 Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
    584 561 try {
    585 p.setProperty(signature + SearchArgsSuffix, sArg.toKryo());
    562 Kryo kryo = new Kryo();
    563 ByteArrayOutputStream baos = new ByteArrayOutputStream();
    564 Output output = new Output(baos);
    565 kryo.writeObject(output, sArg);
    566 p.setProperty(signature + SearchArgsSuffix, new String(Base64.encodeBase64(output.toBytes())));
    586 567 } catch (Exception e) {
    587 568 throw new IOException("Cannot serialize SearchArgument: " + sArg);
    588 569 }
     
    625 606 builder.end();
    626 607 break;
    627 608 case OP_EQ:
    628 builder.equals(getColumnName(lhs), getExpressionValue(rhs));
    609 HiveShims.addEqualsOpToBuilder(builder, getColumnName(lhs),
    610 getColumnType(lhs), getExpressionValue(rhs));
    629 611 break;
    630 612 case OP_NE:
    631 613 builder.startNot();
    632 builder.equals(getColumnName(lhs), getExpressionValue(rhs));
    614 HiveShims.addEqualsOpToBuilder(builder, getColumnName(lhs),
    615 getColumnType(lhs), getExpressionValue(rhs));
    633 616 builder.end();
    634 617 break;
    635 618 case OP_LT:
    636 builder.lessThan(getColumnName(lhs), getExpressionValue(rhs));
    619 HiveShims.addLessThanOpToBuilder(builder, getColumnName(lhs),
    620 getColumnType(lhs), getExpressionValue(rhs));
    637 621 break;
    638 622 case OP_LE:
    639 builder.lessThanEquals(getColumnName(lhs), getExpressionValue(rhs));
    623 HiveShims.addLessThanEqualsOpToBuilder(builder, getColumnName(lhs),
    624 getColumnType(lhs), getExpressionValue(rhs));
    640 625 break;
    641 626 case OP_GT:
    642 627 builder.startNot();
    643 builder.lessThanEquals(getColumnName(lhs), getExpressionValue(rhs));
    628 HiveShims.addLessThanEqualsOpToBuilder(builder, getColumnName(lhs),
    629 getColumnType(lhs), getExpressionValue(rhs));
    644 630 builder.end();
    645 631 break;
    646 632 case OP_GE:
    647 633 builder.startNot();
    648 builder.lessThan(getColumnName(lhs), getExpressionValue(rhs));
    634 HiveShims.addLessThanOpToBuilder(builder, getColumnName(lhs),
    635 getColumnType(lhs), getExpressionValue(rhs));
    649 636 builder.end();
    650 637 break;
    651 638 case OP_BETWEEN:
    652 639 BetweenExpression between = (BetweenExpression) rhs;
    653 builder.between(getColumnName(lhs), getSearchArgObjValue(between.getLower()), getSearchArgObjValue(between.getUpper()));
    640 HiveShims.addBetweenOpToBuilder(builder, getColumnName(lhs),
    641 getColumnType(lhs), HiveShims.getSearchArgObjValue(between.getLower()),
    642 HiveShims.getSearchArgObjValue(between.getUpper()));
    654 643 case OP_IN:
    655 644 InExpression in = (InExpression) rhs;
    656 builder.in(getColumnName(lhs), getSearchArgObjValues(in.getValues()).toArray());
    645 builder.in(getColumnName(lhs), getColumnType(lhs), getSearchArgObjValues(in.getValues()).toArray());
    657 646 default:
    658 647 throw new RuntimeException("Unsupported binary expression type: " + expr.getOpType() + " in " + expr);
    659 648 }
     
    661 650 Expression unaryExpr = ((UnaryExpression) expr).getExpression();
    662 651 switch (expr.getOpType()) {
    663 652 case OP_NULL:
    664 builder.isNull(getColumnName(unaryExpr));
    653 HiveShims.addIsNullOpToBuilder(builder, getColumnName(unaryExpr),
    654 getColumnType(unaryExpr));
    665 655 break;
    666 656 case OP_NOT:
    667 657 builder.startNot();
     
    686 676 }
    687 677 }
    688 678
    679 private PredicateLeaf.Type getColumnType(Expression expr) {
    680 try {
    681 return HiveUtils.getDataTypeForSearchArgs(expr.getDataType());
    682 } catch (ClassCastException e) {
    683 throw new RuntimeException("Expected a Column but found " + expr.getClass().getName() +
    684 " in expression " + expr, e);
    685 }
    686 }
    687
    689 688 private Object getExpressionValue(Expression expr) {
    690 689 switch(expr.getOpType()) {
    691 690 case TERM_COL:
    692 691 return ((Column) expr).getName();
    693 692 case TERM_CONST:
    694 return getSearchArgObjValue(((Const) expr).getValue());
    693 return HiveShims.getSearchArgObjValue(((Const) expr).getValue());
    695 694 default:
    696 695 throw new RuntimeException("Unsupported expression type: " + expr.getOpType() + " in " + expr);
    697 696 }
     
    703 702 }
    704 703 List<Object> newValues = new ArrayList<Object>(values.size());
    705 704 for (Object value : values) {
    706 newValues.add(getSearchArgObjValue(value));
    705 newValues.add(HiveShims.getSearchArgObjValue(value));
    707 706 }
    708 707 return values;
    709 708 }
    710
    711 private Object getSearchArgObjValue(Object value) {
    712 if (value instanceof BigInteger) {
    713 return new BigDecimal((BigInteger)value);
    714 } else if (value instanceof BigDecimal) {
    715 return value;
    716 } else if (value instanceof DateTime) {
    717 return new Timestamp(((DateTime)value).getMillis());
    718 } else {
    719 return value;
    720 }
    721 }
    722
    723 709 }
  • pig/trunk/src/org/apache/pig/Expression.java

     
    79 79
    80 80 protected OpType opType;
    81 81
    82 protected byte dataType;
    83
    82 84 /**
    83 85 * @return the opType
    84 86 */
     
    86 88 return opType;
    87 89 }
    88 90
    91 public byte getDataType() {
    92 return dataType;
    93 }
    94
    89 95 //TODO: Apply a optimizer to Expression from PredicatePushdownOptimizer and
    90 96 // convert OR clauses to BETWEEN OR IN
    91 97 public static class BetweenExpression extends Expression {
     
    221 227 /**
    222 228 * @param name
    223 229 */
    224 public Column(String name) {
    230 public Column(String name, byte dataType) {
    225 231 this.opType = OpType.TERM_COL;
    226 232 this.name = name;
    233 this.dataType = dataType;
    227 234 }
    228 235
    229 236 @Override
  • pig/trunk/src/org/apache/pig/impl/util/hive/HiveUtils.java

     
    30 30 import org.apache.hadoop.hive.common.type.HiveChar;
    31 31 import org.apache.hadoop.hive.common.type.HiveDecimal;
    32 32 import org.apache.hadoop.hive.common.type.HiveVarchar;
    33 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
    33 34 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
    34 35 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
    35 36 import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
     
    69 70 import org.apache.pig.data.DataType;
    70 71 import org.apache.pig.data.Tuple;
    71 72 import org.apache.pig.data.TupleFactory;
    73 import org.apache.pig.hive.HiveShims;
    72 74 import org.apache.pig.tools.pigstats.PigStatusReporter;
    73 75 import org.joda.time.DateTime;
    74 76
     
    179 181 result = new DataByteArray(b, 0, b.length);
    180 182 break;
    181 183 case TIMESTAMP:
    182 java.sql.Timestamp origTimeStamp = (java.sql.Timestamp)poi.getPrimitiveJavaObject(obj);
    183 result = new DateTime(origTimeStamp.getTime());
    184 result = new DateTime(HiveShims.TimestampShim.millisFromTimestamp(poi.getPrimitiveJavaObject(obj)));
    184 185 break;
    185 186 case DATE:
    186 187 java.sql.Date origDate = (java.sql.Date)poi.getPrimitiveJavaObject(obj);
     
    674 675
    675 676 }
    676 677
    677 static class PigJodaTimeStampObjectInspector extends
    678 AbstractPrimitiveJavaObjectInspector implements TimestampObjectInspector {
    679
    680 protected PigJodaTimeStampObjectInspector() {
    681 super(TypeInfoFactory.timestampTypeInfo);
    682 }
    683
    684 @Override
    685 public TimestampWritable getPrimitiveWritableObject(Object o) {
    686 return o == null ? null : new TimestampWritable(new Timestamp(((DateTime)o).getMillis()));
    687 }
    688
    689 @Override
    690 public Timestamp getPrimitiveJavaObject(Object o) {
    691 return o == null ? null : new Timestamp(((DateTime)o).getMillis());
    692 }
    693 }
    694
    695 678 static class PigDecimalObjectInspector extends
    696 679 AbstractPrimitiveJavaObjectInspector implements HiveDecimalObjectInspector {
    697 680
     
    735 718 case STRING:
    736 719 return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    737 720 case TIMESTAMP:
    738 return new PigJodaTimeStampObjectInspector();
    721 return new HiveShims.PigJodaTimeStampObjectInspector();
    739 722 case DECIMAL:
    740 723 return new PigDecimalObjectInspector();
    741 724 case BINARY:
     
    781 764 throw new IllegalArgumentException("Not implemented " + obj.getClass().getName());
    782 765 }
    783 766 }
    767
    768 public static PredicateLeaf.Type getDataTypeForSearchArgs(byte dataType) {
    769 switch (dataType) {
    770 case DataType.INTEGER:
    771 return PredicateLeaf.Type.LONG;
    772 case DataType.LONG:
    773 return PredicateLeaf.Type.LONG;
    774 case DataType.DOUBLE:
    775 return PredicateLeaf.Type.FLOAT;
    776 case DataType.FLOAT:
    777 return PredicateLeaf.Type.FLOAT;
    778 case DataType.CHARARRAY:
    779 return PredicateLeaf.Type.STRING;
    780 case DataType.DATETIME:
    781 return PredicateLeaf.Type.DATE;
    782 case DataType.BIGINTEGER:
    783 case DataType.BIGDECIMAL:
    784 return PredicateLeaf.Type.DECIMAL;
    785 case DataType.BOOLEAN:
    786 return PredicateLeaf.Type.BOOLEAN;
    787 default:
    788 throw new RuntimeException("Unsupported data type:" + DataType.findTypeName(dataType));
    789 }
    790 }
    784 791 }
  • pig/trunk/src/org/apache/pig/newplan/FilterExtractor.java

     
    356 356 } else if (op instanceof ProjectExpression) {
    357 357 ProjectExpression projExpr = (ProjectExpression)op;
    358 358 String fieldName = projExpr.getFieldSchema().alias;
    359 return new Expression.Column(fieldName);
    359 return new Expression.Column(fieldName, projExpr.getType());
    360 360 } else if(op instanceof BinaryExpression) {
    361 361 BinaryExpression binOp = (BinaryExpression)op;
    362 362 if(binOp instanceof AddExpression) {
  • pig/trunk/test/org/apache/pig/builtin/TestOrcStorage.java

     
    17 17 */
    18 18 package org.apache.pig.builtin;
    19 19
    20 import static org.junit.Assert.assertEquals;
    21 import static org.junit.Assert.assertFalse;
    22 import static org.junit.Assert.assertTrue;
    23 import static org.apache.pig.builtin.mock.Storage.resetData;
    24 import static org.apache.pig.builtin.mock.Storage.tuple;
    25
    26 20 import java.io.File;
    27 import java.io.FileNotFoundException;
    28 21 import java.io.IOException;
    29 22 import java.math.BigDecimal;
    30 23 import java.util.ArrayList;
    31 24 import java.util.HashMap;
    32 25 import java.util.Iterator;
    33 26 import java.util.List;
    27 import java.util.TimeZone;
    34 28
    35 29 import org.apache.commons.logging.Log;
    36 30 import org.apache.commons.logging.LogFactory;
    37 import org.apache.hadoop.conf.Configuration;
    38 import org.apache.hadoop.fs.FileStatus;
    39 31 import org.apache.hadoop.fs.FileSystem;
    40 32 import org.apache.hadoop.fs.Path;
    41 import org.apache.hadoop.fs.PathFilter;
    42 33 import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
    43 34 import org.apache.hadoop.hive.ql.io.orc.OrcFile;
    44 35 import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
     
    48 39 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
    49 40 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
    50 41 import org.apache.hadoop.hive.serde2.io.ShortWritable;
    51 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
    52 42 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    53 43 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
    54 44 import org.apache.hadoop.io.BooleanWritable;
     
    65 55 import org.apache.pig.data.DataType;
    66 56 import org.apache.pig.data.DefaultDataBag;
    67 57 import org.apache.pig.data.Tuple;
    58 import org.apache.pig.hive.HiveShims;
    68 59 import org.apache.pig.impl.logicalLayer.schema.Schema;
    69 60 import org.apache.pig.test.Util;
    61
    70 62 import org.joda.time.DateTime;
    71 63 import org.junit.After;
    72 64 import org.junit.Assert;
     
    74 66 import org.junit.BeforeClass;
    75 67 import org.junit.Test;
    76 68
    69 import static org.apache.pig.builtin.mock.Storage.resetData;
    70 import static org.apache.pig.builtin.mock.Storage.tuple;
    71 import static org.junit.Assert.assertEquals;
    72 import static org.junit.Assert.assertFalse;
    73 import static org.junit.Assert.assertTrue;
    74
    77 75 public class TestOrcStorage {
    78 76 final protected static Log LOG = LogFactory.getLog(TestOrcStorage.class);
    79 77
     
    89 87 private static PigServer pigServer = null;
    90 88 private static FileSystem fs;
    91 89
    90 static {
    91 System.setProperty("user.timezone", "UTC");
    92 TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
    93 }
    94
    92 95 @BeforeClass
    93 96 public static void oneTimeSetup(){
    94 97 if(Util.WINDOWS){
     
    282 285 Tuple t = iter.next();
    283 286 assertTrue(t.toString().startsWith("(false,1,1024,65536,9223372036854775807,1.0,-15.0," +
    284 287 ",hi,({(1,bye),(2,sigh)}),{(3,good),(4,bad)},[],"));
    285 assertTrue(t.get(12).toString().matches("2000-03-12T15:00:00.000.*"));
    288 assertTrue(t.get(12).toString().matches("2000-03-12T15:00:00\\.000Z.*"));
    286 289 assertTrue(t.toString().endsWith(",12345678.6547456)"));
    287 290 }
    288 291
     
    406 409 } else if (expected instanceof BooleanWritable) {
    407 410 assertEquals(Boolean.class, actual.getClass());
    408 411 assertEquals(((BooleanWritable) expected).get(), actual);
    409 } else if (expected instanceof TimestampWritable) {
    412 } else if (HiveShims.TimestampWritableShim.isAssignableFrom(expected)) {
    410 413 assertEquals(DateTime.class, actual.getClass());
    411 assertEquals(((TimestampWritable) expected).getTimestamp().getTime(),
    414 assertEquals(HiveShims.TimestampWritableShim.millisFromTimestampWritable(expected),
    412 415 ((DateTime) actual).getMillis());
    413 416 } else if (expected instanceof BytesWritable) {
    414 417 assertEquals(DataByteArray.class, actual.getClass());
  • pig/trunk/test/org/apache/pig/builtin/TestOrcStoragePushdown.java

     
    17 17 */
    18 18 package org.apache.pig.builtin;
    19 19
    20 import static org.junit.Assert.assertEquals;
    21 import static org.junit.Assert.assertTrue;
    22
    23 20 import java.io.BufferedWriter;
    24 21 import java.io.File;
    25 22 import java.io.FileOutputStream;
     
    61 58 import org.junit.BeforeClass;
    62 59 import org.junit.Test;
    63 60
    61 import static org.junit.Assert.*;
    62
    63
    64 64 public class TestOrcStoragePushdown {
    65 65
    66 66 private static List<OpType> supportedOpTypes;
     
    221 221 String q = query + "b = filter a by srcid == 10;" + "store b into 'out';";
    222 222 Expression expr = getExpressionForTest(q, Arrays.asList("srcid"));
    223 223 SearchArgument sarg = orcStorage.getSearchArgument(expr);
    224 assertEquals("leaf-0 = (EQUALS srcid 10)\n" +
    225 "expr = leaf-0", sarg.toString());
    224 assertEqualsSarg(sarg, "leaf-0 = (EQUALS srcid 10)",
    225 "expr = leaf-0");
    226 226 }
    227 227
    228 228 @Test
     
    230 230 String q = query + "b = filter a by (srcid > 10 or dstid <= 5) and name == 'foo' and mrkt is null;" + "store b into 'out';";
    231 231 Expression expr = getExpressionForTest(q, Arrays.asList("srcid", "dstid", "name", "mrkt"));
    232 232 SearchArgument sarg = orcStorage.getSearchArgument(expr);
    233 assertEquals("leaf-0 = (LESS_THAN_EQUALS srcid 10)\n" +
    234 "leaf-1 = (LESS_THAN_EQUALS dstid 5)\n" +
    235 "leaf-2 = (EQUALS name foo)\n" +
    236 "leaf-3 = (IS_NULL mrkt)\n" +
    237 "expr = (and (or (not leaf-0) leaf-1) leaf-2 leaf-3)", sarg.toString());
    233 assertEqualsSarg(sarg, "leaf-0 = (LESS_THAN_EQUALS srcid 10)",
    234 "leaf-1 = (LESS_THAN_EQUALS dstid 5)",
    235 "leaf-2 = (EQUALS name foo)",
    236 "leaf-3 = (IS_NULL mrkt)",
    237 "expr = (and (or (not leaf-0) leaf-1) leaf-2 leaf-3)");
    238 238 }
    239 239
    240 240 @Test
     
    242 242 String q = query + "b = filter a by srcid != 10 and mrkt is not null;" + "store b into 'out';";
    243 243 Expression expr = getExpressionForTest(q, Arrays.asList("srcid", "dstid", "name", "mrkt"));
    244 244 SearchArgument sarg = orcStorage.getSearchArgument(expr);
    245 assertEquals("leaf-0 = (EQUALS srcid 10)\n" +
    246 "leaf-1 = (IS_NULL mrkt)\n" +
    247 "expr = (and (not leaf-0) (not leaf-1))", sarg.toString());
    245 assertEqualsSarg(sarg,"leaf-0 = (EQUALS srcid 10)",
    246 "leaf-1 = (IS_NULL mrkt)",
    247 "expr = (and (not leaf-0) (not leaf-1))");
    248 248 }
    249 249
    250 250 @Test
     
    253 253 String q = query + "b = filter a by srcid > 10 or srcid < 20;" + "store b into 'out';";
    254 254 Expression expr = getExpressionForTest(q, Arrays.asList("srcid"));
    255 255 SearchArgument sarg = orcStorage.getSearchArgument(expr);
    256 assertEquals("leaf-0 = (LESS_THAN_EQUALS srcid 10)\n" +
    257 "leaf-1 = (LESS_THAN srcid 20)\n" +
    258 "expr = (or (not leaf-0) leaf-1)", sarg.toString());
    256 assertEqualsSarg(sarg, "leaf-0 = (LESS_THAN_EQUALS srcid 10)",
    257 "leaf-1 = (LESS_THAN srcid 20)",
    258 "expr = (or (not leaf-0) leaf-1)");
    259 259 }
    260 260
    261 261 @Test
     
    264 264 String q = query + "b = filter a by srcid == 10 or srcid == 11;" + "store b into 'out';";
    265 265 Expression expr = getExpressionForTest(q, Arrays.asList("srcid"));
    266 266 SearchArgument sarg = orcStorage.getSearchArgument(expr);
    267 assertEquals("leaf-0 = (EQUALS srcid 10)\n" +
    268 "leaf-1 = (EQUALS srcid 11)\n" +
    269 "expr = (or leaf-0 leaf-1)", sarg.toString());
    267 assertEqualsSarg(sarg, "leaf-0 = (EQUALS srcid 10)",
    268 "leaf-1 = (EQUALS srcid 11)",
    269 "expr = (or leaf-0 leaf-1)");
    270 270 }
    271 271
    272 272 @Test
     
    282 282 q = query + "b = filter a by name matches 'foo*' and srcid == 10;" + "store b into 'out';";
    283 283 expr = getExpressionForTest(q, Arrays.asList("srcid", "name"));
    284 284 sarg = orcStorage.getSearchArgument(expr);
    285 assertEquals("leaf-0 = (EQUALS srcid 10)\n" +
    286 "expr = leaf-0", sarg.toString());
    285 assertEqualsSarg(sarg, "leaf-0 = (EQUALS srcid 10)",
    286 "expr = leaf-0");
    287 287
    288 288 q = query + "b = filter a by srcid == 10 and name matches 'foo*';" + "store b into 'out';";
    289 289 expr = getExpressionForTest(q, Arrays.asList("srcid", "name"));
    290 290 sarg = orcStorage.getSearchArgument(expr);
    291 assertEquals("leaf-0 = (EQUALS srcid 10)\n" +
    292 "expr = leaf-0", sarg.toString());
    291 assertEqualsSarg(sarg, "leaf-0 = (EQUALS srcid 10)",
    292 "expr = leaf-0");
    293 293
    294 294 // OR - Nothing should be pushed
    295 295 q = query + "b = filter a by name matches 'foo*' or srcid == 10;" + "store b into 'out';";
     
    307 307 "store b into 'out';";
    308 308 Expression expr = getExpressionForTest(q, Arrays.asList("srcid"));
    309 309 SearchArgument sarg = orcStorage.getSearchArgument(expr);
    310 assertEquals("leaf-0 = (EQUALS srcid 10)\n" +
    311 "expr = leaf-0", sarg.toString());
    310 assertEqualsSarg(sarg, "leaf-0 = (EQUALS srcid 10)",
    311 "expr = leaf-0");
    312 312 }
    313 313
    314 314 @Test
     
    419 419 public void testPredicatePushdownVarchar() throws Exception {
    420 420 testPredicatePushdown(basedir + "charvarchar.orc", "$1 == 'alice allen '", 19, 18000);
    421 421 }
    422
    423 private static void assertEqualsSarg(SearchArgument actual, String... expected) {
    424 String hive1Expected = String.join("\n", expected);
    425 String hive3Expected = String.join(", ", expected);
    426
    427 if (hive1Expected.equals(actual.toString())) {
    428 return;
    429 }
    430 if (hive3Expected.equals(actual.toString())) {
    431 return;
    432 }
    433 fail(actual.toString() + "\n does not match expected SARG:\n" + hive3Expected);
    434 }
    422 435 }
  • pig/trunk/test/org/apache/pig/test/TestLoaderStorerShipCacheFiles.java

     
    19 19
    20 20 import java.util.List;
    21 21
    22 import org.apache.hive.common.util.HiveVersionInfo;
    22 23 import org.apache.pig.ExecType;
    23 24 import org.apache.pig.PigServer;
    24 25 import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan;
    25 26 import org.apache.pig.impl.PigContext;
    26 import org.apache.pig.impl.util.Utils;
    27
    27 28 import org.junit.Assert;
    28 29 import org.junit.Before;
    29 30 import org.junit.Ignore;
     
    45 46 "store a into 'ooo';";
    46 47 PhysicalPlan pp = Util.buildPp(pigServer, query);
    47 48
    48 String[] expectedJars = new String[] {"hive-common", "hive-exec", "hive-serde",
    49 "hive-shims-0.23", "hive-shims-common", "kryo"};
    49 String hiveVersion = HiveVersionInfo.getVersion().substring(0, 1);
    50 if (hiveVersion.equals("3")) {
    51 String[] expectedJars = new String[] {"hive-common", "hive-exec", "hive-serde",
    52 "hive-shims-0.23", "hive-shims-common", "orc-core",
    53 "hive-storage-api", "kryo", "minlog"
    54 };
    50 55
    51 checkPlan(pp, expectedJars, 6, pigServer.getPigContext());
    56 checkPlan(pp, expectedJars, 9, pigServer.getPigContext());
    57 } else {
    58 String[] expectedJars = new String[] {"hive-common", "hive-exec", "hive-serde",
    59 "hive-shims-0.23", "hive-shims-common", "kryo"};
    60
    61 checkPlan(pp, expectedJars, 6, pigServer.getPigContext());
    62 }
    52 63 }
    53 64
    54 65 @Test
     
    57 68 "store a into 'ooo' using OrcStorage;";
    58 69 PhysicalPlan pp = Util.buildPp(pigServer, query);
    59 70
    60 String[] expectedJars = new String[] {"hive-common", "hive-exec", "hive-serde",
    61 "hive-shims-0.23", "hive-shims-common", "kryo"};
    71 String hiveVersion = HiveVersionInfo.getVersion().substring(0, 1);
    72 if (hiveVersion.equals("3")) {
    73 String[] expectedJars = new String[] {"hive-common", "hive-exec", "hive-serde",
    74 "hive-shims-0.23", "hive-shims-common", "orc-core",
    75 "hive-storage-api", "kryo", "minlog"
    76 };
    62 77
    63 checkPlan(pp, expectedJars, 6, pigServer.getPigContext());
    78 checkPlan(pp, expectedJars, 9, pigServer.getPigContext());
    79 } else {
    80 String[] expectedJars = new String[] {"hive-common", "hive-exec", "hive-serde",
    81 "hive-shims-0.23", "hive-shims-common", "kryo"};
    82
    83
    84 checkPlan(pp, expectedJars, 6, pigServer.getPigContext());
    85 }
    64 86 }
    65 87
    66 88 @Test