|
|
-
Fwd: strange SIGPIPE from "bin/ls -ld" in Shell.runCommand seen when running HBase testsTed Yu 2011-12-07, 20:08
Hi,
Seeking advice from experts. Thanks ---------- Forwarded message ---------- From: Mikhail Bautin <[EMAIL PROTECTED]> Date: Wed, Dec 7, 2011 at 11:52 AM Subject: Re: strange SIGPIPE from "bin/ls -ld" in Shell.runCommand seen when running HBase tests To: [EMAIL PROTECTED], [EMAIL PROTECTED] I am already using umask 022. Permissions on all components of the path are also OK. Also, "ls -ld" succeeds sometimes, but other times it fails with a SIGPIPE and no error message. Additionally, I saw cases where it SIGPIPE'd but produced correct output (a "drwxr-xr-x ..." line). Here is my patch for Hadoop to work around the ls -ld SIGPIPE issue (I just overrode the hadoop-0.20.205.0 jar in my local maven repository to run unit tests). Index: src/core/org/apache/hadoop/fs/RawLocalFileSystem.java ==================================================================--- src/core/org/apache/hadoop/fs/RawLocalFileSystem.java (revision 1198126) +++ src/core/org/apache/hadoop/fs/RawLocalFileSystem.java (working copy) @@ -416,7 +416,7 @@ IOException e = null; try { StringTokenizer t = new StringTokenizer( - FileUtil.execCommand(new File(getPath().toUri()), + FileUtil.execCommandWithRetries(new File(getPath().toUri()), Shell.getGET_PERMISSION_COMMAND())); //expected format //-rw------- 1 username groupname ... Index: src/core/org/apache/hadoop/fs/FileUtil.java ==================================================================--- src/core/org/apache/hadoop/fs/FileUtil.java (revision 1198126) +++ src/core/org/apache/hadoop/fs/FileUtil.java (working copy) @@ -19,6 +19,7 @@ package org.apache.hadoop.fs; import java.io.*; +import java.util.Arrays; import java.util.Enumeration; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; @@ -703,6 +704,20 @@ String output = Shell.execCommand(args); return output; } + + static String execCommandWithRetries(File f, String... cmd) + throws IOException { + for (int attempt = 0; attempt < 10; ++attempt) { + try { + return execCommand(f, cmd); + } catch (IOException ex) { + LOG.error("Failed to execute command: f=" + f + " cmd=" + + Arrays.toString(cmd) + " (attempt " + attempt + ")", + ex); + } + } + return execCommand(f, cmd); + } /** * Create a tmp file for a base file. Index: src/core/org/apache/hadoop/util/Shell.java ==================================================================--- src/core/org/apache/hadoop/util/Shell.java (revision 1198126) +++ src/core/org/apache/hadoop/util/Shell.java (working copy) @@ -239,6 +239,7 @@ String line = inReader.readLine(); while(line != null) { line = inReader.readLine(); + LOG.error("Additional line from output: " + line); } // wait for the process to finish and check the exit code exitCode = process.waitFor(); @@ -251,6 +252,25 @@ completed.set(true); //the timeout thread handling //taken care in finally block + LOG.error("exitCode=" + exitCode); + if (exitCode == 141 && this instanceof ShellCommandExecutor) { + String[] execStr = getExecString(); + String outStr = ((ShellCommandExecutor) this).getOutput(); + LOG.error("execStr=" + java.util.Arrays.toString(execStr) + + ", outStr=" + outStr); + if (execStr.length >= 2 && + execStr[0].equals("/bin/ls") && + execStr[1].equals("-ld") && + outStr.startsWith("d") && + outStr.length() >= 11 && + outStr.charAt(10) == ' ') { + // A work-around for a weird SIGPIPE bug on ls -ld. + LOG.error("Ignoring exit code " + exitCode + " for /bin/ls -ld: " + + "got output " + outStr); + exitCode = 0; + } + } + if (exitCode != 0) { throw new ExitCodeException(exitCode, errMsg.toString()); } Thanks, On Wed, Dec 7, 2011 at 11:31 AM, Ted Yu <[EMAIL PROTECTED]> wrote: /data/users/mbautin/workdirs/hb-os/target/test-data/37d6e996-cba6-4a12-85bc-dbcf2e91d297 org.apache.hadoop.hbase.coprocessor.TestRegionServerCoprocessorExceptionWithAbort /data/users/mbautin/workdirs/hb-os/target/test-data/37d6e996-cba6-4a12-85bc-dbcf2e91d297/dfscluster_76df8fc0-6827-4d9d-8728-eb5ee43b0bae/dfs/data/data3], org.apache.hadoop.fs.RawLocalFileSystem$RawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:418) org.apache.hadoop.fs.RawLocalFileSystem$RawLocalFileStatus.getPermission(RawLocalFileSystem.java:393) org.apache.hadoop.util.DiskChecker.mkdirsWithExistsAndPermissionCheck(DiskChecker.java:146) org.apache.hadoop.hdfs.server.datanode.DataNode.makeInstance(DataNode.java:1537) org.apache.hadoop.hdfs.server.datanode.DataNode.instantiateDataNode(DataNode.java:1484) org.apache.hadoop.hdfs.server.datanode.DataNode.instantiateDataNode(DataNode.java:1459) org.apache.hadoop.hdfs.MiniDFSCluster.startDataNodes(MiniDFSCluster.java:417) org.apache.hadoop.hbase.HBaseTestingUtility.startMiniDFSCluster(HBaseTestingUtility.java:369) org.apache.hadoop.hbase.HBaseTestingUtility.startMiniCluster(HBaseTestingUtility.java:537) org.apache.hadoop.hbase.HBaseTestingUtility.startMiniCluster(HBaseTestingUtility.java:493) org.apache.hadoop.hbase.HBaseTestingUtility.startMiniCluster(HBaseTestingUtility.java:480) org.apache.hadoop.hbase.coprocessor.TestRegionServerCoprocessorExceptionWithAbort.setupBeforeClass(TestRegionServerCoprocessorExceptionWithAbort.java:94) sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:45) org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:15) org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:42) org.junit.interna |