Environment (2 Core CPU, 8 GB Ram)
Tables:
Student Teacher School Assessment
Create table teacher, school and student with (id, name)
Insert Teachers <- Took 1 second 15 ms
INSERT INTO teacher (id, name) VALUES (generate_series(1, 200000), 'default')
INSERT INTO student (id, name) VALUES (generate_series(1, 10000000), 'default')
INSERT INTO school (id, name) VALUES (generate_series(1, 50000), 'default')
CREATE TABLE assessment (
id serial,
submission_time timestamptz,
school_id int,
teacher_id int,
student_id int,
grade int,
is_passed bool,
FOREIGN KEY (school_id) REFERENCES school(id),
FOREIGN KEY (teacher_id) REFERENCES teacher(id),
FOREIGN KEY (student_id) REFERENCES student(id)
)
SELECT create_hypertable('assessment', 'submission_time')
INSERT INTO assessment
(id, submission_time, school_id, student_id, teacher_id, grade, is_passed)
VALUES
(generate_series(1, 10000), timestamptz '2023-07-01 00:00:00 UTC' + random() * INTERVAL '30 days', floor(random() * 50000) + 1, floor(random() * 10000000) + 1, floor(random() * 200000) + 1, floor(random() * 3) + 1, RANDOM()::INT::BOOLEAN);
INSERT INTO assessment
(id, submission_time, school_id, student_id, teacher_id, grade, is_passed)
VALUES
(generate_series(10001, 100000), timestamptz '2023-07-01 00:00:00 UTC' + random() * INTERVAL '30 days', floor(random() * 50000) + 1, floor(random() * 10000000) + 1, floor(random() * 200000) + 1, floor(random() * 3) + 1, RANDOM()::INT::BOOLEAN);
INSERT INTO assessment
(id, submission_time, school_id, student_id, teacher_id, grade, is_passed)
VALUES
(generate_series(100001, 200000), timestamptz '2023-07-01 00:00:00 UTC' + random() * INTERVAL '30 days', floor(random() * 50000) + 1, floor(random() * 10000000) + 1, floor(random() * 200000) + 1, floor(random() * 3) + 1, RANDOM()::INT::BOOLEAN);
INSERT INTO assessment
(id, submission_time, school_id, student_id, teacher_id, grade, is_passed)
VALUES
(generate_series(200001, 1000000), timestamptz '2023-07-01 00:00:00 UTC' + random() * INTERVAL '30 days', floor(random() * 50000) + 1, floor(random() * 10000000) + 1, floor(random() * 200000) + 1, floor(random() * 3) + 1, RANDOM()::INT::BOOLEAN);
explain analyze select count(*) from assessment where submission_time between '2023-07-03' and '2023-07-09';
Insert 40,00,000 randomised assessment <- Took ~ 6 min
explain analyze select count(*) from assessment where submission_time between '2023-07-03' and '2023-07-09';
explain analyze select count(*) from assessment_non_hypertable where submission_time between '2023-07-03' and '2023-07-09';
explain analyze select count(*) from assessment where submission_time between '2023-07-03' and '2023-07-09';
16.Query Count of all assessment for a particular week in a non-hypertable with exact same data
explain analyze select count(*) from assessment where submission_time between '2023-07-03' and '2023-07-09';
CREATE INDEX assessment_non_hypertable_submission_idx ON assessment_non_hypertable (submission_time)
We see that Non-hypertable is performing better as of now, Let’s try doubling the size of tables and see how it affects the result.