Skip to content

Commit

Permalink
part-5: persistence to disk
Browse files Browse the repository at this point in the history
  • Loading branch information
sinkinben committed Oct 10, 2021
1 parent 22c4c53 commit 217edde
Show file tree
Hide file tree
Showing 9 changed files with 230 additions and 52 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@
.o
.bundle
bin/
test.c
test.c
tinydb
mydb.db
2 changes: 1 addition & 1 deletion common.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ meta_command_result_t do_meta_command(buffer_t *input, table_t *table)
{
if (strcmp(input->buffer, ".exit") == 0)
{
free_table(table);
db_close(table);
exit(EXIT_SUCCESS);
}
else
Expand Down
8 changes: 7 additions & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@
int main(int argc, char *argv[])
{
buffer_t *input_buffer = new_buffer_input();
table_t *table = new_table();
if (argc < 2)
{
printf("Must supply a database filename.\n");
exit(EXIT_FAILURE);
}
char *filename = argv[1];
table_t *table = db_open(filename);
while (true)
{
print_prompt();
Expand Down
2 changes: 1 addition & 1 deletion makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
run:
gcc main.c -o tinydb
./tinydb
./tinydb mydb.db
test:
gcc test.c -o a.out
./a.out
94 changes: 94 additions & 0 deletions pager.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#include <stdint.h>
#include <sys/fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include "types.h"
#ifndef PAGER_H
#define PAGER_H

pager_t *pager_open(const char *filename)
{
// S_IWUSR means User write permission
// S_IRUSR means User read permission
int fd = open(filename, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR);

if (fd < 0)
{
printf("Unable to open file\n");
exit(EXIT_FAILURE);
}

off_t file_length = lseek(fd, 0, SEEK_END);

pager_t *pager = (pager_t *)malloc(sizeof(pager_t));
pager->file_descriptor = fd;
pager->file_length = file_length;

for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++)
{
pager->pages[i] = NULL;
}
return pager;
}

void *get_page(pager_t *pager, uint32_t page_num)
{
if (page_num > TABLE_MAX_PAGES)
{
printf("Tried to fetch page number out of bounds. %d > %d\n", page_num, TABLE_MAX_PAGES);
exit(EXIT_FAILURE);
}

// Cache miss. Allocate memory and load from file.
if (pager->pages[page_num] == NULL)
{
void *page = malloc(PAGE_SIZE);
uint32_t num_pages = pager->file_length / PAGE_SIZE;

if (pager->file_length % PAGE_SIZE)
{
num_pages += 1;
}

if (page_num <= num_pages)
{
lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
ssize_t bytes_read = read(pager->file_descriptor, page, PAGE_SIZE);
if (bytes_read == -1)
{
printf("Error reading file: %d\n", errno);
exit(EXIT_FAILURE);
}
}
pager->pages[page_num] = page;
}
return pager->pages[page_num];
}

void pager_flush(pager_t *pager, uint32_t page_num, uint32_t size)
{
if (pager->pages[page_num] == NULL)
{
printf("Tried to flush null page\n");
exit(EXIT_FAILURE);
}
off_t offset = lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);

if (offset == -1)
{
printf("Error seeking: %d\n", errno);
exit(EXIT_FAILURE);
}

ssize_t bytes_written = write(pager->file_descriptor, pager->pages[page_num], size);

if (bytes_written == -1)
{
printf("Error writing: %d\n", errno);
exit(EXIT_FAILURE);
}
}

#endif
26 changes: 25 additions & 1 deletion spec/main_spec.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
describe 'database' do
before do
`rm mydb.db`
end
def run_script(commands)
raw_output = nil
IO.popen("./tinydb", "r+") do |pipe|
IO.popen("./tinydb mydb.db", "r+") do |pipe|
commands.each do |command|
pipe.puts command
end
Expand Down Expand Up @@ -69,4 +72,25 @@ def run_script(commands)
"tinydb > ",
])
end


it 'keeps data after closing connection' do
result1 = run_script([
"insert 1 user1 person1@example.com",
".exit",
])
expect(result1).to match_array([
"tinydb > Executed.",
"tinydb > ",
])
result2 = run_script([
"select",
".exit",
])
expect(result2).to match_array([
"tinydb > (1, user1, person1@example.com)",
"Executed.",
"tinydb > ",
])
end
end
102 changes: 56 additions & 46 deletions table.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,11 @@
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "types.h"
#include "pager.h"
#ifndef TABLE_H
#define TABLE_H

#define COLUMN_USERNAME_SIZE 32
#define COLUMN_EMAIL_SIZE 255
typedef struct
{
uint32_t id;
char username[COLUMN_USERNAME_SIZE + 1];
char email[COLUMN_EMAIL_SIZE + 1];
} row_t;

#define size_of_attribute(Struct, Attribute) sizeof(((Struct *)0)->Attribute)
const uint32_t ID_SIZE = size_of_attribute(row_t, id);
const uint32_t USERNAME_SIZE = size_of_attribute(row_t, username);
const uint32_t EMAIL_SIZE = size_of_attribute(row_t, email);
const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE;

#define offset_of_attribute(Struct, Attribute) ((uint32_t)(&((Struct *)0)->Attribute))
const uint32_t ID_OFFSET = offset_of_attribute(row_t, id);
const uint32_t USERNAME_OFFSET = offset_of_attribute(row_t, username);
const uint32_t EMAIL_OFFSET = offset_of_attribute(row_t, email);

void serialize_row(row_t *source, void *dest)
{
memcpy(dest + ID_OFFSET, &(source->id), ID_SIZE);
Expand All @@ -39,27 +21,13 @@ void deserialize_row(void *source, row_t *dest)
memcpy(&(dest->email), source + EMAIL_OFFSET, EMAIL_SIZE);
}

const uint32_t TABLE_MAX_PAGES = 100;

const uint32_t PAGE_SIZE = 4096; // 4KB
const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE;
const uint32_t TABLE_MAX_ROWS = TABLE_MAX_PAGES * ROWS_PER_PAGE;

// 暂且不用 B+ 树, 使用数组的形式作为存储结构
typedef struct
{
uint32_t num_rows;
void *pages[TABLE_MAX_PAGES];
} table_t;

void *get_row_slot(table_t *table, uint32_t row_num)
{
uint32_t page_num = row_num / ROWS_PER_PAGE;
void *page = table->pages[page_num];
if (page == NULL)
{
page = table->pages[page_num] = malloc(PAGE_SIZE);
}
// pointer to the corresponding page of row
void *page = get_page(table->pager, page_num);
// offset in a page
uint32_t row_offset = row_num % ROWS_PER_PAGE;
uint32_t bytes_offset = row_offset * ROW_SIZE;
return page + bytes_offset;
Expand All @@ -70,24 +38,66 @@ void print_row(row_t *row)
printf("(%d, %s, %s)\n", row->id, row->username, row->email);
}

table_t *new_table()
// opening the database file
// initializing a pager data structure
// initializing a table data structure
table_t *db_open(const char *filename)
{
pager_t *pager = pager_open(filename);
table_t *table = (table_t *)malloc(sizeof(table_t));
table->num_rows = 0;
for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++)
{
table->pages[i] = NULL;
}
uint32_t num_rows = pager->file_length / ROW_SIZE;
table->num_rows = num_rows;
table->pager = pager;
return table;
}

void free_table(table_t *table)
// flushes the page cache to disk
// closes the database file
// frees the memory for the Pager and Table data structures
void db_close(table_t *table)
{
pager_t *pager = table->pager;
uint32_t num_full_pages = table->num_rows / PAGE_SIZE;

for (uint32_t i = 0; i < num_full_pages; i++)
{
if (pager->pages[i] == NULL)
continue;
pager_flush(pager, i, PAGE_SIZE);
free(pager->pages[i]);
pager->pages[i] = NULL;
}

// There may be a partial page to write to the end of the file
// This should not be needed after we switch to a B-tree
uint32_t num_additional_rows = table->num_rows % ROWS_PER_PAGE;
if (num_additional_rows > 0)
{
uint32_t page_num = num_full_pages;
if (pager->pages[page_num] != NULL)
{
pager_flush(pager, page_num, num_additional_rows * ROW_SIZE);
free(pager->pages[page_num]);
pager->pages[page_num] = NULL;
}
}

int ret = close(pager->file_descriptor);
if (ret < 0)
{
printf("Error closing db file.\n");
exit(EXIT_FAILURE);
}

for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++)
{
if (table->pages[i] != NULL)
free(table->pages[i]);
if (pager->pages[i] != NULL)
{
free(pager->pages[i]);
pager->pages[i] = NULL;
}
}
free(pager);
free(table);
}

Expand Down
Binary file removed tinydb
Binary file not shown.
44 changes: 43 additions & 1 deletion types.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#include <stdbool.h>
#ifndef TYPES_H
#define TYPES_H

Expand Down Expand Up @@ -29,6 +28,27 @@ typedef enum
STATEMENT_SELECT
} statement_type_t;

// table 的一行·
#define COLUMN_USERNAME_SIZE 32
#define COLUMN_EMAIL_SIZE 255
typedef struct
{
uint32_t id;
char username[COLUMN_USERNAME_SIZE + 1];
char email[COLUMN_EMAIL_SIZE + 1];
} row_t;

#define size_of_attribute(Struct, Attribute) sizeof(((Struct *)0)->Attribute)
const uint32_t ID_SIZE = size_of_attribute(row_t, id);
const uint32_t USERNAME_SIZE = size_of_attribute(row_t, username);
const uint32_t EMAIL_SIZE = size_of_attribute(row_t, email);
const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE;

#define offset_of_attribute(Struct, Attribute) ((uint32_t)(&((Struct *)0)->Attribute))
const uint32_t ID_OFFSET = offset_of_attribute(row_t, id);
const uint32_t USERNAME_OFFSET = offset_of_attribute(row_t, username);
const uint32_t EMAIL_OFFSET = offset_of_attribute(row_t, email);

// insert, select 等命令的解析结果
// 相当于一个简易的 sql-parser
typedef struct
Expand All @@ -44,4 +64,26 @@ typedef enum
EXECUTE_TABLE_FULL
} execute_result_t;

const uint32_t TABLE_MAX_PAGES = 100;

const uint32_t PAGE_SIZE = 4096; // 4KB
const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE;
const uint32_t TABLE_MAX_ROWS = TABLE_MAX_PAGES * ROWS_PER_PAGE;

typedef struct
{
int file_descriptor;
uint32_t file_length;
void *pages[TABLE_MAX_PAGES];
} pager_t;

// 暂且不用 B+ 树, 使用数组的形式作为存储结构
typedef struct
{
uint32_t num_rows;
pager_t *pager;
} table_t;



#endif

0 comments on commit 217edde

Please sign in to comment.