2016-12-19 16 views
1

Я использую GridSerach для поиска лучших гиперпараметров в классификаторе, как описано здесь: http://scikit-learn.org/stable/auto_examples/model_selection/plot_nested_cross_validation_iris.htmlPython GridSearchCV индекс ххххх находится вне границ для размера хххххх

Вот как кусок кода выглядит следующим образом:

X = X.values # convert from pandas Dataframe to numpy array 
    y = np.array(y) 
    n_samples, n_features = X.shape 
    n_outputs = y.shape[0] 

    inner_cv = cross_validation.StratifiedKFold(y, n_folds=4, shuffle=True, random_state=rnd) 
    outer_cv = cross_validation.StratifiedKFold(y, n_folds=kFold, shuffle=True, random_state=rnd) 

    # Non_nested parameter search and scoring 
    clf = GridSearchCV(estimator=pipeline, param_grid=param_dict, scoring= scores, cv=inner_cv) 

    # Nested CV with parameter optimization 
    nested_score = cross_validation.cross_val_score(clf, X=X, y=y, cv=outer_cv) 
    nested_score.fit(X,y) 
    nested_scores = nested_score.mean() 

однако по какой-то причине, я получаю эту ошибку:

--------------------------------------------------------------------------- 
IndexError        Traceback (most recent call last) 
<ipython-input-1-cad4e848fb54> in <module>() 
    124 
    125    # Nested CV with parameter optimization 
--> 126    nested_score = cross_validation.cross_val_score(clf, X=X, y=y, cv=outer_cv) 
    127    nested_score.fit(X,y) 
    128    nested_scores = nested_score.mean() 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\cross_validation.py in cross_val_score(estimator, X, y, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch) 
    1431            train, test, verbose, None, 
    1432            fit_params) 
-> 1433      for train, test in cv) 
    1434  return np.array(scores)[:, 0] 
    1435 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable) 
    798    # was dispatched. In particular this covers the edge 
    799    # case of Parallel used with an exhausted iterator. 
--> 800    while self.dispatch_one_batch(iterator): 
    801     self._iterating = True 
    802    else: 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator) 
    656     return False 
    657    else: 
--> 658     self._dispatch(tasks) 
    659     return True 
    660 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch) 
    564 
    565   if self._pool is None: 
--> 566    job = ImmediateComputeBatch(batch) 
    567    self._jobs.append(job) 
    568    self.n_dispatched_batches += 1 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __init__(self, batch) 
    178   # Don't delay the application, to avoid keeping the input 
    179   # arguments in memory 
--> 180   self.results = batch() 
    181 
    182  def get(self): 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self) 
    70 
    71  def __call__(self): 
---> 72   return [func(*args, **kwargs) for func, args, kwargs in self.items] 
    73 
    74  def __len__(self): 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0) 
    70 
    71  def __call__(self): 
---> 72   return [func(*args, **kwargs) for func, args, kwargs in self.items] 
    73 
    74  def __len__(self): 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\cross_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, error_score) 
    1529    estimator.fit(X_train, **fit_params) 
    1530   else: 
-> 1531    estimator.fit(X_train, y_train, **fit_params) 
    1532 
    1533  except Exception as e: 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\grid_search.py in fit(self, X, y) 
    802 
    803   """ 
--> 804   return self._fit(X, y, ParameterGrid(self.param_grid)) 
    805 
    806 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\grid_search.py in _fit(self, X, y, parameter_iterable) 
    551          self.fit_params, return_parameters=True, 
    552          error_score=self.error_score) 
--> 553     for parameters in parameter_iterable 
    554     for train, test in cv) 
    555 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable) 
    798    # was dispatched. In particular this covers the edge 
    799    # case of Parallel used with an exhausted iterator. 
--> 800    while self.dispatch_one_batch(iterator): 
    801     self._iterating = True 
    802    else: 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator) 
    656     return False 
    657    else: 
--> 658     self._dispatch(tasks) 
    659     return True 
    660 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch) 
    564 
    565   if self._pool is None: 
--> 566    job = ImmediateComputeBatch(batch) 
    567    self._jobs.append(job) 
    568    self.n_dispatched_batches += 1 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __init__(self, batch) 
    178   # Don't delay the application, to avoid keeping the input 
    179   # arguments in memory 
--> 180   self.results = batch() 
    181 
    182  def get(self): 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self) 
    70 
    71  def __call__(self): 
---> 72   return [func(*args, **kwargs) for func, args, kwargs in self.items] 
    73 
    74  def __len__(self): 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0) 
    70 
    71  def __call__(self): 
---> 72   return [func(*args, **kwargs) for func, args, kwargs in self.items] 
    73 
    74  def __len__(self): 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\cross_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, error_score) 
    1522  start_time = time.time() 
    1523 
-> 1524  X_train, y_train = _safe_split(estimator, X, y, train) 
    1525  X_test, y_test = _safe_split(estimator, X, y, test, train) 
    1526 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\cross_validation.py in _safe_split(estimator, X, y, indices, train_indices) 
    1589     X_subset = X[np.ix_(indices, train_indices)] 
    1590   else: 
-> 1591    X_subset = safe_indexing(X, indices) 
    1592 
    1593  if y is not None: 

C:\Users\Yas\Anaconda3\lib\site-packages\sklearn\utils\__init__.py in safe_indexing(X, indices) 
    161         indices.dtype.kind == 'i'): 
    162    # This is often substantially faster than X[indices] 
--> 163    return X.take(indices, axis=0) 
    164   else: 
    165    return X[indices] 


    IndexError: index 4549 is out of bounds for size 4549 

X и Y имеет следующие размеры:

X: (6066, 5) 
    y: (6066,) 

все выглядит нормально. Откуда возникает проблема?

Спасибо, что поделились своим мнением.

ответ

1

Не уверен, что вы пытаетесь сделать здесь, но GridsearchCV не классификатор, и поэтому вы не можете передать его cross_val_score.

GridsearchCV многократно выполняет множественную проверку с использованием разных параметров. Поэтому он представляет собой несколько классификаторов. Он имеет атрибут best_classifier после его установки.

 Смежные вопросы

  • Нет связанных вопросов^_^