Skip to content

Commit dd8c662

Browse files
authored
Add SAM assignment scraper (#51)
* Add node-html-parser * Add SAM scraper * Add documentation * Integrate SAM scraper into bot * Remove un-used variable professor
1 parent 81778e9 commit dd8c662

File tree

10 files changed

+153
-5
lines changed

10 files changed

+153
-5
lines changed

.config.example.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"api": "https://liverpool.instructure.com/api/v1",
33
"course_filter": "^202021-(COMP[0-9]+)",
4+
"sam_course_filter": "^COMP1",
45
"prefix": "ca!",
56
"question_directory": "questions/",
67
"answer_directory": "answers/",

.env_SAMPLE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
BOT_TOKEN = YOUR_TOKEN_HERE
22
CANVAS_TOKEN = YOUR_TOKEN_HERE
3+
SAM_TOKEN = YOUR_TOKEN_HERE

package-lock.json

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
"discord.js-light": "^3.3.6",
3535
"dotenv": "^8.2.0",
3636
"node-fetch": "^2.6.1",
37+
"node-html-parser": "^2.0.0",
3738
"simple-git": "^2.22.0",
3839
"sqlite3": "^5.0.0"
3940
},

src/canvas.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
const fetch = require("node-fetch");
44

5+
const { USER_AGENT } = require('./constants.js');
6+
57
const PAGE_LENGTH = 50;
68

79
const RE_LINKURL = /<([^>]+)>/g;
@@ -13,6 +15,7 @@ const req = Object.freeze({
1315
'get': async function(url, auth, headers) {
1416
if (!headers) headers = {};
1517
headers['Authorization'] = `Bearer ${auth}`;
18+
if (!('User-Agent' in headers)) headers['User-Agent'] = USER_AGENT;
1619
console.log('GET', String(url));
1720
const res = await fetch(url, {headers});
1821
bucket.remaining = parseFloat(res.headers.get('X-Rate-Limit-Remaining'));

src/canvasutils.js

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@ function parseAssignments(week, assignments, courses) {
2020
}
2121

2222
class CanvasUtils {
23-
constructor(canvas, overrides) {
23+
constructor(canvas, sam, overrides) {
2424
this.canvas = canvas;
25+
this.sam = sam;
2526
this.overrides = overrides;
2627
this.lastUpdate = undefined;
2728
this.cache = undefined;
@@ -34,7 +35,14 @@ class CanvasUtils {
3435
promises.push(this.canvas.getCourseAssignments(courseID));
3536
promises.push(this.canvas.getCourseDiscussions(courseID));
3637
}
37-
const assignments = (await Promise.all(promises)).flat();
38+
let assignments = (await Promise.all(promises)).flat();
39+
const samCourses = await this.sam.getFilteredCourses();
40+
promises = [];
41+
for (let courseID of samCourses) {
42+
promises.push(this.sam.getCourseAssignments(courseID));
43+
courses[courseID] = courseID;
44+
}
45+
assignments = assignments.concat((await Promise.all(promises)).flat());
3846
return { courses, assignments };
3947
}
4048

src/constants.js

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,16 @@ exports.BOT_PRESENCE = Object.freeze({ status: 'online', activity: { type: 'WATC
1212
exports.DB_NAME = '.store.db';
1313

1414
exports.EMBED_COLOR = 0xff0000;
15+
16+
exports.USER_AGENT = 'DiscordCanvasBot/1.0';
17+
18+
exports.OPTS_HTML_PARSE = Object.freeze({
19+
lowerCaseTagNames: false,
20+
comment: false,
21+
blockTextElements: {
22+
script: false,
23+
noscript: false,
24+
style: false,
25+
pre: true
26+
}
27+
});

src/index.js

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ const { Client } = require('discord.js-light');
55
const sqlite3 = require('sqlite3');
66

77
const Canvas = require('./canvas.js');
8+
const Sam = require('./sam.js');
89
const CanvasUtils = require('./canvasutils.js');
910
const { assignmentAutoUpdate } = require('./autoupdate.js');
1011
const { asyncWrap, unwrapSync, ipcSend } = require('./utils.js');
@@ -98,13 +99,14 @@ function awaitOpen(database) {
9899
});
99100
}
100101

101-
const startBot = module.exports = async function(botToken, canvasToken, config) {
102+
const startBot = module.exports = async function(botToken, canvasToken, samToken, config) {
102103
const db = new sqlite3.Database(DB_NAME, sqlite3.OPEN_READWRITE);
103104
await awaitOpen(db);
104105
db.on('error', console.error);
105106
config = enforceType(Config, config);
106107
const canvas = new Canvas(canvasToken, config);
107-
const canvasUtils = new CanvasUtils(canvas, config.overrides);
108+
const sam = new Sam(samToken, config);
109+
const canvasUtils = new CanvasUtils(canvas, sam, config.overrides);
108110
Object.defineProperties(client, {
109111
config: {
110112
configurable: false,
@@ -194,10 +196,11 @@ function shutdown() {
194196
if (require.main === module) {
195197
const DISCORD_TOKEN = process.env.DISCORD_TOKEN || process.env.BOT_TOKEN || '';
196198
const CANVAS_TOKEN = process.env.CANVAS_TOKEN || '';
199+
const SAM_TOKEN = process.env.SAM_TOKEN || '';
197200
const CONFIG = require('../.config.json');
198201

199202
hasShutdown = false;
200-
startBot(DISCORD_TOKEN, CANVAS_TOKEN, CONFIG).then(null, function() {
203+
startBot(DISCORD_TOKEN, CANVAS_TOKEN, SAM_TOKEN, CONFIG).then(null, function() {
201204
console.error.apply(this, arguments);
202205
process.exit(1);
203206
});

src/sam.js

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
'use strict';
2+
3+
const fetch = require("node-fetch");
4+
const HTMLParser = require('node-html-parser');
5+
6+
const { USER_AGENT, OPTS_HTML_PARSE } = require('./constants.js');
7+
8+
const URL = 'https://sam.csc.liv.ac.uk/COMP/CW_List.pl';
9+
const AUTH_COOKIE = '/var/www/SAM/CJar/InterViewMgr_COMP';
10+
11+
const RE_NBSP = /&nbsp/g;
12+
13+
const req = async function(url, {sessionCookie, method, body, mimeType }) {
14+
return await fetch(url, {
15+
method, body, headers: {
16+
'User-Agent': USER_AGENT,
17+
'Cookie': `${encodeURIComponent(AUTH_COOKIE)}=${encodeURIComponent(sessionCookie)}`,
18+
'Content-Type': mimeType
19+
}
20+
});
21+
}
22+
23+
const get = function(url, sessionCookie) {
24+
return req(url, {sessionCookie, method: 'GET'});
25+
}
26+
27+
const post = function(url, sessionCookie, body) {
28+
return req(url, {sessionCookie, method: 'POST', body, mimeType: 'application/x-www-form-urlencoded' });
29+
}
30+
31+
32+
class Sam {
33+
constructor(token, options) {
34+
if (typeof token !== 'string' || token.length === 0) throw new Error('Invalid API token');
35+
this.token = token;
36+
this.options = options || {};
37+
}
38+
}
39+
40+
/**
41+
* getCourses
42+
* fetch and parse a list available courses
43+
* @param {String} sessionCookie - Your SAM login session cookie
44+
* @returns {Array.<String>} list of module names
45+
*/
46+
const getCourses = async function() {
47+
const res = await get(URL, this.token);
48+
const text = await res.text();
49+
50+
const html = HTMLParser.parse(text, OPTS_HTML_PARSE);
51+
52+
const moduleSelect = html.querySelector('select[NAME="qryModule"]');
53+
if (moduleSelect === null) throw new Error('Could not find modules');
54+
55+
const moduleElements = moduleSelect.querySelectorAll('OPTION');
56+
57+
const modules = moduleElements.map(el => el.getAttribute('VALUE'));
58+
return modules;
59+
}
60+
Sam.prototype.getCourses = getCourses;
61+
62+
63+
/**
64+
* getFilteredCourses
65+
* fetch a list of available courses and filter them
66+
*/
67+
const getFilteredCourses = async function() {
68+
const courses = await this.getCourses();
69+
const regex = new RegExp(this.options.sam_course_filter, 'g');
70+
return courses.filter(course => course.match(regex) !== null);
71+
}
72+
Sam.prototype.getFilteredCourses = getFilteredCourses;
73+
74+
75+
/**
76+
* getCourseAssignments
77+
* fetch and parse the list of assignmens for given module
78+
* @param {String} sessionCookie - Your SAM login session cookie
79+
* @param {String} course - Name of module
80+
* @returns {Array.<Assignment>} assignments for given module
81+
*/
82+
const getCourseAssignments = async function(course) {
83+
const res = await post(URL, this.token, `qryModule=${encodeURIComponent(course)}`);
84+
const text = await res.text();
85+
86+
const html = HTMLParser.parse(text, OPTS_HTML_PARSE);
87+
88+
const assignmentRows = html.querySelectorAll('TABLE.general TR');
89+
90+
const assignmentData = assignmentRows.map(el => el.querySelectorAll('TD')).filter(d => d.length === 4);
91+
92+
return assignmentData.map(td => {
93+
const [course] = td[0].structuredText.split('\n');
94+
const [id, name] = td[1].structuredText.split('\n');
95+
const due = td[2].structuredText;
96+
const a = td[3].querySelector('A');
97+
const url = a === null ? undefined : a.getAttribute('href');
98+
const dueDate = new Date(due);
99+
return { id: `SAM-${course}-${id}`, name: name.replace(RE_NBSP,'').trim(), course, due: dueDate.getTime(), dueDate, points: 1, url };
100+
})
101+
}
102+
Sam.prototype.getCourseAssignments = getCourseAssignments;
103+
104+
module.exports = Sam;

src/types.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ const ConfigAutomatedAssignment = Object.freeze({
139139
const Config = Object.freeze({
140140
api: TypeOrDefault(String, 'https://instructure.com/api/v1'),
141141
course_filter: TypeOrDefault(String, '(.*)'),
142+
sam_course_filter: TypeOrDefault(String, '.'),
142143
question_directory: TypeOrDefault(String, "questions/"),
143144
answer_directory: TypeOrDefault(String, "answers/"),
144145
prefix: TypeOrDefault(String, 'ca!'),

0 commit comments

Comments
 (0)