2017年9月15日金曜日

HadoopのSplit Metadataって何?

YarnRuntimeException: java.io.IOException: Split metadata size exceeded 10000000 exception

[root@sandbox ~]# strings /hadoop/yarn/local/usercache/admin/appcache/application_1505439442865_0004/filecache/10/job.splitmetainfo
META-SPL
sandbox.hortonworks.com



org.apache.hadoop.mapreduce.split.JobSplit class:

/**
 * This class groups the fundamental classes associated with
 * reading/writing splits. The split information is divided into
 * two parts based on the consumer of the information. The two
 * parts are the split meta information, and the raw split 
 * information. The first part is consumed by the JobTracker to
 * create the tasks' locality data structures. The second part is
 * used by the maps at runtime to know what to do!
 * These pieces of information are written to two separate files.
 * The metainformation file is slurped by the JobTracker during 
 * job initialization. A map task gets the meta information during
 * the launch and it reads the raw split bytes directly from the 
 * file.
 */


JobSplitWriter class:

  private static void writeJobSplitMetaInfo(FileSystem fs, Path filename, 
      FsPermission p, int splitMetaInfoVersion, 
      JobSplit.SplitMetaInfo[] allSplitMetaInfo

  public static <T extends InputSplit> void createSplitFiles(Path jobSubmitDir, 
      Configuration conf, FileSystem fs, T[] splits) 
  throws IOException, InterruptedException {
    FSDataOutputStream out = createFile(fs, 
        JobSubmissionFiles.getJobSplitFile(jobSubmitDir), conf);
    SplitMetaInfo[] info = writeNewSplits(conf, splits, out);
    out.close();
    writeJobSplitMetaInfo(fs,JobSubmissionFiles.getJobSplitMetaFile(jobSubmitDir), 
        new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION), splitVersion,
        info);
  }

  private static <T extends InputSplit> 
  SplitMetaInfo[] writeNewSplits(Configuration conf, 
      T[] array, FSDataOutputStream out)
  throws IOException, InterruptedException {

    SplitMetaInfo[] info = new SplitMetaInfo[array.length];
    if (array.length != 0) {
        ... (snip) ...
        info[i++] = 
          new JobSplit.SplitMetaInfo
              locations, offset,
              split.getLength());
        offset += currCount - prevCount;
      }
    }
    return info;
  }


org.apache.hadoop.mapreduce.split.JobSplit class:
    public SplitMetaInfo(String[] locations, long startOffset, 
        long inputDataLength) {
      this.locations = locations;
      this.startOffset = startOffset;
      this.inputDataLength = inputDataLength;
    }

0 件のコメント:

コメントを投稿