笔记
-----
底层的C实现使用一个随机数生成器来选择适合模型的特性。因此,对于相同的输入数据,结果略有不同的情况并不少见。如果出现这种情况,尝试使用较小的tol参数。
在某些情况下,Predict输出可能与独立liblinear的输出不匹配。参见:ref:“区别于liblinear <liblinear_differences>”。</liblinear_differences>
References
----------
L-BFGS-B -- Software for Large-scale Bound-constrained Optimization Ciyou Zhu, Richard Byrd, Jorge Nocedal and Jose Luis Morales. http://users.iems.northwestern.edu/~nocedal/lbfgsb.html
LIBLINEAR -- A Library for Large Linear Classification
https://www.csie.ntu.edu.tw/~cjlin/liblinear/
SAG -- Mark Schmidt, Nicolas Le Roux, and Francis Bach Minimizing Finite Sums with the Stochastic Average Gradient
https://hal.inria.fr/hal-00860051/document
SAGA -- Defazio, A., Bach F. & Lacoste-Julien S. (2014).
SAGA: A Fast Incremental Gradient Method With Support for Non-Strongly Convex Composite Objectives
https://arxiv.org/abs/1407.0202
Hsiang-Fu Yu, Fang-Lan Huang, Chih-Jen Lin (2011). Dual coordinate descent
methods for logistic regression and maximum entropy models. Machine Learning 85(1-2):41-75.
https://www.csie.ntu.edu.tw/~cjlin/papers/maxent_dual.pdf 引用
---------
Ciyou Zhu, Richard Byrd, Jorge Nocedal和Jose Luis moral. http://users.iems.northwestern.edu/~ Nocedal /lbfgsb.html
LIBLINEAR——一个大型线性分类的图书馆
https://www.csie.ntu.edu.tw/ cjlin / liblinear /
SAG——Mark Schmidt, Nicolas Le Roux和Francis Bach用随机平均梯度最小化有限和
https://hal.inria.fr/hal-00860051/document
佐贺—德法齐奥,巴赫F. &拉科斯特-朱利安S.(2014)。
一个支持非强凸复合目标的快速增量梯度方法
https://arxiv.org/abs/1407.0202
俞香福、黄方兰、林志仁(2011)。双坐标下降
逻辑回归和最大熵模型的方法。机器学习85 (1 - 2):41 - 75。
https://www.csie.ntu.edu.tw/ cjlin /论文/ maxent_dual.pdf
Examples
--------
>>> from sklearn.datasets import load_iris
>>> from sklearn.linear_model import LogisticRegression
>>> X, y = load_iris(return_X_y=True)
>>> clf = LogisticRegression(random_state=0).fit(X, y)
>>> clf.predict(X[:2, :])
array([0, 0])
>>> clf.predict_proba(X[:2, :])
array([[9.8...e-01, 1.8...e-02, 1.4...e-08],
[9.7...e-01, 2.8...e-02, ...e-08]])
>>> clf.score(X, y)
0.97...
"""
@_deprecate_positional_args
def __init__(self, penalty='l2', *, dual=False, tol=1e-4, C=1.0,
fit_intercept=True, intercept_scaling=1, class_weight=None,
random_state=None, solver='lbfgs', max_iter=100,
multi_class='auto', verbose=0, warm_start=False, n_jobs=None,
l1_ratio=None):
self.penalty = penalty
self.dual = dual
self.tol = tol
self.C = C
self.fit_intercept = fit_intercept
self.intercept_scaling = intercept_scaling
self.class_weight = class_weight
self.random_state = random_state
self.solver = solver
self.max_iter = max_iter
self.multi_class = multi_class
self.verbose = verbose
self.warm_start = warm_start
self.n_jobs = n_jobs
self.l1_ratio = l1_ratio
def fit(self, X, y, sample_weight=None):
"""
Fit the model according to the given training data.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
Training vector, where n_samples is the number of samples and
n_features is the number of features.
y : array-like of shape (n_samples,)
Target vector relative to X.
sample_weight : array-like of shape (n_samples,) default=None
Array of weights that are assigned to individual samples.
If not provided, then each sample is given unit weight.
.. versionadded:: 0.17
*sample_weight* support to LogisticRegression.
Returns
-------
self
Fitted estimator.
Notes
-----
The SAGA solver supports both float64 and float32 bit arrays.
"""
solver = _check_solver(self.solver, self.penalty, self.dual)
if not isinstance(self.C, numbers.Number) or self.C < 0:
raise ValueError(
"Penalty term must be positive; got (C=%r)" % self.C)
if self.penalty == 'elasticnet':
if (not isinstance(self.l1_ratio, numbers.Number) or
self.l1_ratio < 0 or self.l1_ratio > 1):
raise ValueError(
"l1_ratio must be between 0 and 1;"
" got (l1_ratio=%r)" %
self.l1_ratio)
elif self.l1_ratio is not None:
warnings.warn("l1_ratio parameter is only used when penalty is "
"'elasticnet'. Got "
"(penalty={})".
format(self.penalty))
if self.penalty == 'none':
if self.C != 1.0: # default values
warnings.warn("Setting penalty='none' will ignore the C and
l1_ratio "
"parameters")
# Note that check for l1_ratio is done right above
C_ = np.inf
penalty = 'l2'
else:
C_ = self.C
penalty = self.penalty
if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:
raise ValueError("Maximum number of iteration must be positive;"
" got (max_iter=%r)" %
self.max_iter)
if not isinstance(self.tol, numbers.Number) or self.tol < 0:
raise ValueError("Tolerance for stopping criteria must be "
"positive; got (tol=%r)" %
self.tol)
if solver == 'lbfgs':
_dtype = np.float64
else:
_dtype = [np.float64, np.float32]
X, y = self._validate_data(X, y, accept_sparse='csr', dtype=_dtype,
order="C",
accept_large_sparse=solver != 'liblinear')
check_classification_targets(y)
self.classes_ = np.unique(y)
multi_class = _check_multi_class(self.multi_class, solver,
len(self.classes_))
if solver == 'liblinear':
if effective_n_jobs(self.n_jobs) != 1:
warnings.warn("'n_jobs' > 1 does not have any effect when"
" 'solver' is set to 'liblinear'. Got 'n_jobs'"
" = {}.".
format(effective_n_jobs(self.n_jobs)))
self.coef_, self.intercept_, n_iter_ = _fit_liblinear(X, y, self.C, self.
fit_intercept, self.intercept_scaling, self.class_weight, self.penalty, self.
dual, self.verbose, self.max_iter, self.tol, self.random_state,
sample_weight=sample_weight)
self.n_iter_ = np.array([n_iter_])
return self
if solver in ['sag', 'saga']:
max_squared_sum = row_norms(X, squared=True).max()
else:
max_squared_sum = None
n_classes = len(self.classes_)
classes_ = self.classes_
if n_classes < 2:
raise ValueError(
"This solver needs samples of at least 2 classes"
" in the data, but the data contains only one"
" class: %r" %
classes_[0])
if len(self.classes_) == 2:
n_classes = 1
classes_ = classes_[1:]
if self.warm_start:
warm_start_coef = getattr(self, 'coef_', None)
else:
warm_start_coef = None
if warm_start_coef is not None and self.fit_intercept:
warm_start_coef = np.append(warm_start_coef,
self.intercept_[:np.newaxis],
axis=1)
self.coef_ = list()
self.intercept_ = np.zeros(n_classes)
# Hack so that we iterate only once for the multinomial case.
if multi_class == 'multinomial':
classes_ = [None]
warm_start_coef = [warm_start_coef]
if warm_start_coef is None:
warm_start_coef = [None] * n_classes
path_func = delayed(_logistic_regression_path)
# The SAG solver releases the GIL so it's more efficient to use
# threads for this solver.
if solver in ['sag', 'saga']:
prefer = 'threads'
else:
prefer = 'processes'
fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose, **
_joblib_parallel_args(prefer=prefer))(
path_func(X, y, pos_class=class_, Cs=[C_],
l1_ratio=self.l1_ratio, fit_intercept=self.fit_intercept,
tol=self.tol, verbose=self.verbose, solver=solver,
multi_class=multi_class, max_iter=self.max_iter,
class_weight=self.class_weight, check_input=False,
random_state=self.random_state, coef=warm_start_coef_,
penalty=penalty, max_squared_sum=max_squared_sum,
sample_weight=sample_weight) for
(class_, warm_start_coef_) in zip(classes_, warm_start_coef))
fold_coefs_, _, n_iter_ = zip(*fold_coefs_)
self.n_iter_ = np.asarray(n_iter_, dtype=np.int32)[:0]
n_features = X.shape[1]
if multi_class == 'multinomial':
self.coef_ = fold_coefs_[0][0]
else:
self.coef_ = np.asarray(fold_coefs_)
self.coef_ = self.coef_.reshape(n_classes, n_features +
int(self.fit_intercept))
if self.fit_intercept:
self.intercept_ = self.coef_[:-1]
self.coef_ = self.coef_[::-1]
return self
def predict_proba(self, X):
"""
Probability estimates.
The returned estimates for all classes are ordered by the label of classes. For a multi_class problem, if multi_class is set to be "multinomial" the softmax function is used to find the predicted probability of each class.
Else use a one-vs-rest approach, i.e calculate the probability of each class assuming it to be positive using the logistic function. and normalize these values across all the classes.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Vector to be scored, where `n_samples` is the number of samples and `n_features` is the number of features.
Returns
-------
T : array-like of shape (n_samples, n_classes)
Returns the probability of the sample for each class in the model, where classes are ordered as they are in ``self.classes_``.
"""
check_is_fitted(self)
ovr = self.multi_class in ["ovr", "warn"] or (self.multi_class == 'auto'
and (self.classes_.size <= 2 or
self.solver == 'liblinear'))
if ovr:
return super()._predict_proba_lr(X)
else:
decision = self.decision_function(X)
if decision.ndim == 1:
# Workaround for multi_class="multinomial" and binary
outcomes
# which requires softmax prediction with only a 1D decision.
decision_2d = np.c_[-decisiondecision]
else:
decision_2d = decision
return softmax(decision_2d, copy=False)
def predict_log_proba(self, X):
"""
Predict logarithm of probability estimates.
The returned estimates for all classes are ordered by the label of classes.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Vector to be scored, where `n_samples` is the number of samples and `n_features` is the number of features.
Returns
-------
T : array-like of shape (n_samples, n_classes)
Returns the log-probability of the sample for each class in the model, where classes are ordered as they are in ``self.classes_``.
"""
return np.log(self.predict_proba(X))
概率的估计。
所有类返回的估计值都按照类的标签排序。对于一个多类问题,将多类设为“多项式”,利用softmax函数求出每一类的预测概率。
否则使用one vs-rest方法,i。计算概率的每一类假设它是正使用logistic函数。并在所有类中规范化这些值。