Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Linear Regression function implemented #1005

Merged
merged 3 commits into from
Sep 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion integration_tests/test_statistics.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from statistics import (mean, fmean, geometric_mean, harmonic_mean, variance,
stdev, pvariance, pstdev, correlation, covariance)
stdev, pvariance, pstdev, correlation, covariance, linear_regression)
from ltypes import i32, f64, i64, f32

eps: f64
eps = 1e-12


def test_mean():
b: list[i32]
b = [9, 4, 10]
Expand Down Expand Up @@ -163,6 +164,28 @@ def test_correlation():
j = correlation(c, d)
assert abs(j - 0.9057925526720572) < eps

def test_linear_regression():
c: list[f64]
c = [2.74, 1.23, 2.63, 2.22, 3.0, 1.98]
d: list[f64]
d = [9.4, 1.23, 2.63, 22.4, 1.9, 13.98]

slope: f64
intercept: f64
slope, intercept = linear_regression(c, d)

assert abs(slope + 0.6098133124816717) < eps
assert abs(intercept - 9.992570618707845) < eps

a: list[i32]
b: list[i32]
a = [12, 24, 2, 1, 43, 53, 23]
b = [2, 13, 14, 63, 49, 7, 3]

slope, intercept = linear_regression(a, b)

assert abs(slope + 0.18514007308160782) < eps
assert abs(intercept - 25.750304506699152) < eps

def check():
test_mean()
Expand All @@ -173,6 +196,7 @@ def check():
test_stdev()
test_pvariance()
test_pstdev()
test_linear_regression()
test_correlation()
test_covariance()

Expand Down
78 changes: 77 additions & 1 deletion src/runtime/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,6 @@ def pstdev(x: list[i32]) -> f64:
"""
return pvariance(x)**0.5


@overload
def correlation(x: list[i32], y: list[i32]) -> f64:
"""
Expand Down Expand Up @@ -422,3 +421,80 @@ def covariance(x: list[f64], y: list[f64]) -> f64:
for i in range(n):
num += (x[i] - xmean) * (y[i] - ymean)
return num / (n-1)

@overload
def linear_regression(x: list[i32], y: list[i32]) -> tuple[f64, f64]:

"""
Returns the slope and intercept of simple linear regression
parameters estimated using ordinary least squares.
"""
n: i32 = len(x)
if len(y) != n:
raise Exception('linear regression requires that both inputs have same number of data points')
if n < 2:
raise Exception('linear regression requires at least two data points')
xmean: f64 = mean(x)
ymean: f64 = mean(y)

sxy: f64 = 0.0
i: i32
for i in range(n):
sxy += (x[i] - xmean) * (y[i] - ymean)

sxx: f64 = 0.0
j: i32
for j in range(n):
sxx += (x[j] - xmean) ** 2

slope: f64

if sxx == 0:
raise Exception('x is constant')
else:
slope = sxy / sxx

intercept: f64 = ymean - slope * xmean

LinReg: tuple[f64, f64] = (slope, intercept)

return LinReg

@overload
def linear_regression(x: list[f64], y: list[f64]) -> tuple[f64, f64]:

"""
Returns the slope and intercept of simple linear regression
parameters estimated using ordinary least squares.
"""
n: i32 = len(x)
if len(y) != n:
raise Exception('linear regression requires that both inputs have same number of data points')
if n < 2:
raise Exception('linear regression requires at least two data points')
xmean: f64 = mean(x)
ymean: f64 = mean(y)

sxy: f64 = 0.0
i: i32
for i in range(n):
sxy += (x[i] - xmean) * (y[i] - ymean)

sxx: f64 = 0.0
j: i32
for j in range(n):
sxx += (x[j] - xmean) ** 2

slope: f64

if sxx == 0:
raise Exception('x is constant')
else:
slope = sxy / sxx

intercept: f64 = ymean - slope * xmean

LinReg: tuple[f64, f64] = (slope, intercept)

return LinReg