from sklearn import svm, datasets
from spark_sklearn import GridSearchCV
from pyspark import SparkContext, SparkConf
iris = datasets.load_iris()
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
svr = svm.SVC(gamma='auto')
conf = SparkConf().setAppName('pysk').setMaster('local')
sc = SparkContext(conf=conf)
clf = GridSearchCV(sc, svr, parameters)
clf.fit(iris.data, iris.target)
spark-sklearn在sc = SparkContext(conf=conf)处报错'JavaPackage' object is not callable,经检查是pyspark版本过高导致。