需求:百万、千万、4千万级日志对设备进行除重
环境:设备内存64G,scala单机版运行shell文件
日志:
20G 48000000.log
4.0G 10000000.log
396M 1000000.log
代码如下
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
|
import scala.io.Source import scala.collection.mutable.ArrayBuffer var text = Source.fromFile( "/Users/shuhai/1000000.log" ).getLines; //imei is required var log = ArrayBuffer[String](); while (text.hasNext) { var row = text.next if (row.contains( "imei" )) log + = row } //println(log.head) //println(log.length) var act = log.filter{ (row) = > row.contains( "active" ) } var imei = act.map( _ .split( "," ).filter( _ .contains( "imei" ))).map( _ .mkString( "," )) var clean = imei.map( _ .replaceAll( """ , "" )).map( _ .replaceAll( " " , "" )) println( "total:" + clean.length) var count = clean.toList.distinct.length println( "distinct:" + count) |
异常
➜ scala scala distinct_imei.scala
java.lang.OutOfMemoryError: Java heap space
at java.util.Arrays.copyOfRange(Arrays.java:3664)
at java.lang.String.(String.java:201)
at java.io.BufferedReader.readLine(BufferedReader.java:356)
at java.io.BufferedReader.readLine(BufferedReader.java:389)
at scala.io.BufferedSource$BufferedLineIterator.hasNext(BufferedSource.scala:72)
at Main$$anon$1.(distinct_imei.scala:7)
at Main$.main(distinct_imei.scala:1)
at Main.main(distinct_imei.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:483)
at scala.reflect.internal.util.ScalaClassLoader$$anonfun$run$1.apply(ScalaClassLoader.scala:70)
at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
at scala.reflect.internal.util.ScalaClassLoader$URLClassLoader.asContext(ScalaClassLoader.scala:101)
at scala.reflect.internal.util.ScalaClassLoader$class.run(ScalaClassLoader.scala:70)
at scala.reflect.internal.util.ScalaClassLoader$URLClassLoader.run(ScalaClassLoader.scala:101)
at scala.tools.nsc.CommonRunner$class.run(ObjectRunner.scala:22)
at scala.tools.nsc.ObjectRunner$.run(ObjectRunner.scala:39)
at scala.tools.nsc.CommonRunner$class.runAndCatch(ObjectRunner.scala:29)
at scala.tools.nsc.ObjectRunner$.runAndCatch(ObjectRunner.scala:39)
at scala.tools.nsc.ScriptRunner.scala$tools$nsc$ScriptRunner$$runCompiled(ScriptRunner.scala:170)
at scala.tools.nsc.ScriptRunner$$anonfun$runScript$1.apply(ScriptRunner.scala:187)
at scala.tools.nsc.ScriptRunner$$anonfun$runScript$1.apply(ScriptRunner.scala:187)
at scala.tools.nsc.ScriptRunner$$anonfun$withCompiledScript$1$$anonfun$apply$mcZ$sp$1.apply(ScriptRunner.scala:156)
at scala.tools.nsc.ScriptRunner$$anonfun$withCompiledScript$1.apply$mcZ$sp(ScriptRunner.scala:156)
at scala.tools.nsc.ScriptRunner$$anonfun$withCompiledScript$1.apply(ScriptRunner.scala:124)
at scala.tools.nsc.ScriptRunner$$anonfun$withCompiledScript$1.apply(ScriptRunner.scala:124)
at scala.tools.nsc.util.package$.trackingThreads(package.scala:43)
at scala.tools.nsc.util.package$.waitingForThreads(package.scala:27)
at scala.tools.nsc.ScriptRunner.withCompiledScript(ScriptRunner.scala:123)
at scala.tools.nsc.ScriptRunner.runScript(ScriptRunner.scala:187)
解决办法:
1
|
env JAVA_OPTS= "-Xms256m -Xmx2048m" scala distinct_imei.scala |
使用scala -help可以看到有一个-J的参数,也可以实现相同的效果
1
|
scala -J-Xms256m -J-Xmx2048m distinct_imei.scala |
via:http://alvinalexander.com/scala/scala-repl-java.lang.outofmemoryerror-java-heap-space-error
http://www.4wei.cn/archives/1002410