package com.smtscript.lib.csv; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import java.io.RandomAccessFile; import java.nio.channels.FileLock; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; import org.mozilla.javascript.NativeArray; import org.mozilla.javascript.NativeObject; import com.smtscript.utils.SMTStatic; public class ScriptCSVReader { private int _prevChar = -1; private long _readLines; private long _readBytes; private int _cellEOF; private String _cellValue; private InputStream _fis; private byte[] _data = new byte[100]; private Charset _charset = Charset.forName("UTF-8"); public ScriptCSVReader(File file, boolean syncRead) throws Exception { if(syncRead) { RandomAccessFile randomAccessFile = new RandomAccessFile(file, "rw"); try { FileLock fileLock = randomAccessFile.getChannel().lock(); try { int length = (int)randomAccessFile.length(); byte[] data = new byte[length]; randomAccessFile.read(data); _fis = new ByteArrayInputStream(data); } finally { fileLock.close(); } } finally { randomAccessFile.close(); } } else { _fis = new FileInputStream(file); } } public void setCharset(String charSet) { _charset = Charset.forName(charSet); } public long getLines() { return _readLines; } public long getBytes() { return _readBytes; } private int readByte() throws Exception { if(_prevChar >= 0) { int ret = _prevChar; _prevChar = -1; return ret; } int ch = _fis.read(); if(ch == '\r') { ch = _fis.read(); if(ch != '\n') { _prevChar = ch; ch = '\n'; } } if(ch > 0) _readBytes ++; if(ch == '\n') _readLines ++; return ch; } private boolean readCell() throws Exception { int ch; boolean first = true; int pos = 0; int mode = -1; // -1:无数据, 0:无引号,1:引号开始,2:引号结束 while((ch = readByte()) >= 0) { // 如果当前是第一个位置,则判断是否带引号 if(first) { first = false; // 如果发现引号,则进入引号模式 if(ch == '"') { mode = 1; continue; } // 如果发现结束符,则直接结束 else if(ch == ',' || ch == '\n') { mode = 0; break; } // 否则进入无引号模式 else mode = 0; } // 如果当前位置不是第一个位置,且进入无引号模式 else if(mode == 0) { // 如果发现结束符,则结束当前扫描 if(ch == ',' || ch == '\n') break; } // 如果当前位置不是第一个位置,且进入有引号模式 else if(mode == 1) { // 如果发现引号 if(ch == '"') { // 读取下一个数据 ch = readByte(); // 如果下一个数据不是引号,则代表已经进入结束模式 if(ch != '"') { while(true) { // 如果当前读到结束符则退出 if(ch == ',' || ch == '\n' || ch < 0) { mode = 2; break; } // 如果未读到空格,则抛异常 if(ch != ' ' && ch != '\t' && ch != '\r') throw new Exception("csv format error"); // 读下一个字符 ch = readByte(); } break; } } } else { throw new Exception("csv not string EOF"); } // 读取数据 if(pos >= _data.length) { byte[] data1 = _data; _data = new byte[_data.length + 1000]; System.arraycopy(data1, 0, _data, 0, data1.length); } _data[pos ++] = (byte)ch; } if(mode == -1) { _cellEOF = ch; return false; } if(mode == 1) throw new Exception("String mode is not EOF"); _cellEOF = ch; _cellValue = new String(_data, 0, pos, _charset); return true; } protected String[] readLineToArray() throws Exception { List line = new ArrayList(); while(readCell()) { line.add(_cellValue); if(_cellEOF == '\n' || _cellEOF < 0) break; } if(line.size() > 0) return line.toArray(new String[line.size()]); if(_cellEOF == '\n') return new String[0]; return null; } public NativeArray readAllLinesMap(NativeArray colNames)throws Exception { List list = new ArrayList(); NativeObject nvLine; while((nvLine = readLineMap(colNames)) != null) { list.add(nvLine); } return new NativeArray(list.toArray(new NativeObject[list.size()])); } public NativeObject readLineMap(NativeArray colNames)throws Exception { String[] values = readLineToArray(); if(values == null) return null; NativeObject map = new NativeObject(); for(int i = 0; i < colNames.size(); i ++) { String name = (String) colNames.get(i); if(SMTStatic.isNullOrEmpty(name)) continue; if(values.length > i) map.put(name, map, values[i]); } return map; } public NativeArray readLine()throws Exception { String[] cells = readLineToArray(); if(cells == null) return null; return new NativeArray(cells); } public void close() throws Exception { if(_fis != null) { if(_fis instanceof FileInputStream) _fis.close(); _fis = null; } } }