From cb8e8f64cc8a221c384ecdbdbd5ff00925b513f3 Mon Sep 17 00:00:00 2001 From: Felipe Cornejo Date: Thu, 15 Feb 2024 18:28:00 -0300 Subject: [PATCH] chore: add prettier config & cleanup comments --- .prettierignore | 1 + .prettierrc.json | 5 + src/index.ts | 73 +++++---- src/scrapeProfileData.ts | 328 ++++++++++++++++++++++++--------------- 4 files changed, 246 insertions(+), 161 deletions(-) create mode 100644 .prettierignore create mode 100644 .prettierrc.json diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 0000000..3c3629e --- /dev/null +++ b/.prettierignore @@ -0,0 +1 @@ +node_modules diff --git a/.prettierrc.json b/.prettierrc.json new file mode 100644 index 0000000..8d93b46 --- /dev/null +++ b/.prettierrc.json @@ -0,0 +1,5 @@ +{ + "semi": true, + "singleQuote": true, + "jsxSingleQuote": true +} diff --git a/src/index.ts b/src/index.ts index 8a9abef..cd526fc 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,59 +1,56 @@ import dotenv from 'dotenv'; dotenv.config(); -import pkg, { Interaction, InteractionCollector, InteractionCollectorOptions } from 'discord.js'; +import pkg, { Interaction } from 'discord.js'; import { processPostRequest } from './scrapeProfileData'; const { Client, IntentsBitField, AttachmentBuilder } = pkg; const client = new Client({ - intents: [ - IntentsBitField.Flags.Guilds, - IntentsBitField.Flags.GuildMembers, - IntentsBitField.Flags.GuildMessages, - IntentsBitField.Flags.MessageContent, - ] -}) + intents: [ + IntentsBitField.Flags.Guilds, + IntentsBitField.Flags.GuildMembers, + IntentsBitField.Flags.GuildMessages, + IntentsBitField.Flags.MessageContent, + ], +}); client.on('ready', (c) => { - console.log(`✅ ${c.user.tag} is online `) -}) - -// client.on('messageCreate', (message) => { -// console.log(message.content) -// if (message.author.bot) return -// if (message.content === 'Hola' || message.content === 'hola') { -// message.reply('Hola!!') -// } -// }) + console.log(`✅ ${c.user.tag} is online `); +}); client.on('interactionCreate', async (interaction: Interaction) => { - if (!interaction.isChatInputCommand()) return; + if (!interaction.isChatInputCommand()) return; - if (interaction.commandName === 'ping') { - await interaction.reply('Pong!'); - } + if (interaction.commandName === 'ping') { + await interaction.reply('Pong!'); + } - if (interaction.commandName === 'perfil') { - const url = interaction.options.get('url')?.value as string; + if (interaction.commandName === 'perfil') { + const url = interaction.options.get('url')?.value as string; - await interaction.deferReply(); + await interaction.deferReply(); - console.log("url enviada por el usuario: ", url) + console.log('url enviada por el usuario: ', url); - const res = await processPostRequest(url); + const res = await processPostRequest(url); - console.log("datos obtenidos: ", res); + console.log('datos obtenidos: ', res); - const resString = JSON.stringify(res, null, 2); - if (resString.length <= 2000) { - await interaction.editReply(resString); - } else { - const buffer = Buffer.from(resString, 'utf-8'); - const attachment = new AttachmentBuilder(buffer, { name: 'profile.json' }); - await interaction.editReply({ content: 'The response is too large to display here. Please see the file.', files: [attachment] }); - } + const resString = JSON.stringify(res, null, 2); + if (resString.length <= 2000) { + await interaction.editReply(resString); + } else { + const buffer = Buffer.from(resString, 'utf-8'); + const attachment = new AttachmentBuilder(buffer, { + name: 'profile.json', + }); + await interaction.editReply({ + content: + 'The response is too large to display here. Please see the file.', + files: [attachment], + }); } - + } }); +client.login(process.env.TOKEN); -client.login(process.env.TOKEN) \ No newline at end of file diff --git a/src/scrapeProfileData.ts b/src/scrapeProfileData.ts index 347c861..86dd77e 100644 --- a/src/scrapeProfileData.ts +++ b/src/scrapeProfileData.ts @@ -1,185 +1,270 @@ import dotenv from 'dotenv'; dotenv.config(); // @ts-ignore -import Linkout from "linkout-scraper"; //TODO: There are no types for this lib +import Linkout from 'linkout-scraper'; //TODO: There are no types for this lib import puppeteer, { Page } from 'puppeteer'; async function scrapeProfileData(page: Page) { - const profile: Record | null)[]> = { + const profile: Record< + string, + string | (Record | null)[] + > = { url: page.url(), - } + }; try { await page.waitForSelector( - ".pv-text-details__about-this-profile-entrypoint" + '.pv-text-details__about-this-profile-entrypoint', ); const fullName = await page.evaluate(() => { const titleElement = document.querySelector( - ".pv-text-details__about-this-profile-entrypoint" + '.pv-text-details__about-this-profile-entrypoint', ); - const h1Element = titleElement?.querySelector("h1"); - - if(!h1Element) { - return null + const h1Element = titleElement?.querySelector('h1'); + + if (!h1Element) { + return null; } return h1Element.textContent?.trim(); }); - if(fullName){ + if (fullName) { profile.fullName = fullName; - - const nameParts = fullName.split(" "); + + const nameParts = fullName.split(' '); const firstName = nameParts[0]; profile.firstName = firstName; - if(nameParts.length > 1){ - const lastName = nameParts.slice(1).join(" "); + if (nameParts.length > 1) { + const lastName = nameParts.slice(1).join(' '); profile.lastName = lastName; } } const summary = await page.evaluate(() => { - const elements = Array.from(document.querySelectorAll('h2>span[aria-hidden="true"]')); - const element = elements.find(span => { - return span.textContent?.includes("Acerca de"); + const elements = Array.from( + document.querySelectorAll('h2>span[aria-hidden="true"]'), + ); + const element = elements.find((span) => { + return span.textContent?.includes('Acerca de'); }); - - if(element){ - const parentDiv = element.parentElement?.parentElement?.parentElement?.parentElement?.parentElement; - const span = parentDiv?.nextElementSibling?.firstElementChild?.firstElementChild?.firstElementChild?.firstElementChild; + + if (element) { + const parentDiv = + element.parentElement?.parentElement?.parentElement?.parentElement + ?.parentElement; + const span = + parentDiv?.nextElementSibling?.firstElementChild?.firstElementChild + ?.firstElementChild?.firstElementChild; if (!span) { return null; } - const summary = span.textContent?.trim().replace(/\n\s*/g, '') + const summary = span.textContent?.trim().replace(/\n\s*/g, ''); return summary; } else { return null; } }); - if(summary) { + if (summary) { profile.summary = summary; } - + const experiencia = await page.evaluate(() => { - const elements = Array.from(document.querySelectorAll('h2>span[aria-hidden="true"]')); - const element = elements.find(span => { - return span.textContent?.includes("Experiencia"); + const elements = Array.from( + document.querySelectorAll('h2>span[aria-hidden="true"]'), + ); + const element = elements.find((span) => { + return span.textContent?.includes('Experiencia'); }); - - if(element){ - const parentDiv = element.parentElement?.parentElement?.parentElement?.parentElement?.parentElement; - const ul = parentDiv?.nextElementSibling?.firstElementChild;//UL - if (!ul || ul.tagName !== "UL") { + + if (element) { + const parentDiv = + element.parentElement?.parentElement?.parentElement?.parentElement + ?.parentElement; + const ul = parentDiv?.nextElementSibling?.firstElementChild; //UL + if (!ul || ul.tagName !== 'UL') { return null; } - + const liElements = ul.querySelectorAll('li'); - return Array.from(liElements).map(li => { - const divXpath = './div/div[2]/div[1]/div'; - const divJobDescriptionXpath = './div/div[2]/div[2]/ul/li[1]/div/ul/li/div/div/div/div/span[2]'; - var divResult = document.evaluate(divXpath, li, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null); - var divElement = divResult.singleNodeValue; - var divjdResult = document.evaluate(divJobDescriptionXpath, li, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null); - var divjdElement = divjdResult.singleNodeValue; - - if (!divElement) { - return null; - } - - // Assuming the divElement contains structured data like job title, company, and dates - const jobTitleXpath = './/div/div/div/div/span[1]'; // Replace with actual Xpath for job title - const companyXpath = './/span[1]/span[1]'; // Replace with actual Xpath for company name - const datesXpath = './/span[2]/span[1]'; // Replace with actual Xpath for dates - const locationXpath = './/span[3]/span[1]'; // Replace with actual Xpath for dates - - // Extract text content for each piece of data - const jobTitle = document.evaluate(jobTitleXpath, divElement, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue?.textContent?.trim(); - const company = document.evaluate(companyXpath, divElement, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue?.textContent?.trim(); - const dates = document.evaluate(datesXpath, divElement, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue?.textContent?.trim(); - const location = document.evaluate(locationXpath, divElement, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue?.textContent?.trim(); - const jobDescription = divjdElement?.textContent?.trim() - - // Construct an object with the extracted data - const experienceObject: Record = {} - - if(jobTitle) experienceObject.jobTitle = jobTitle; - if(company) experienceObject.company = company; - if(dates) experienceObject.dates = dates; - if(location) experienceObject.location = location; - if(divjdElement) experienceObject.jobDescription = jobDescription; - - return experienceObject; - }).filter(exp => exp != null); + return Array.from(liElements) + .map((li) => { + const divXpath = './div/div[2]/div[1]/div'; + const divJobDescriptionXpath = + './div/div[2]/div[2]/ul/li[1]/div/ul/li/div/div/div/div/span[2]'; + var divResult = document.evaluate( + divXpath, + li, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + null, + ); + var divElement = divResult.singleNodeValue; + var divjdResult = document.evaluate( + divJobDescriptionXpath, + li, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + null, + ); + var divjdElement = divjdResult.singleNodeValue; + + if (!divElement) { + return null; + } + + // Assuming the divElement contains structured data like job title, company, and dates + const jobTitleXpath = './/div/div/div/div/span[1]'; // Replace with actual Xpath for job title + const companyXpath = './/span[1]/span[1]'; // Replace with actual Xpath for company name + const datesXpath = './/span[2]/span[1]'; // Replace with actual Xpath for dates + const locationXpath = './/span[3]/span[1]'; // Replace with actual Xpath for dates + + // Extract text content for each piece of data + const jobTitle = document + .evaluate( + jobTitleXpath, + divElement, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + null, + ) + .singleNodeValue?.textContent?.trim(); + const company = document + .evaluate( + companyXpath, + divElement, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + null, + ) + .singleNodeValue?.textContent?.trim(); + const dates = document + .evaluate( + datesXpath, + divElement, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + null, + ) + .singleNodeValue?.textContent?.trim(); + const location = document + .evaluate( + locationXpath, + divElement, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + null, + ) + .singleNodeValue?.textContent?.trim(); + const jobDescription = divjdElement?.textContent?.trim(); + + // Construct an object with the extracted data + const experienceObject: Record = {}; + + if (jobTitle) experienceObject.jobTitle = jobTitle; + if (company) experienceObject.company = company; + if (dates) experienceObject.dates = dates; + if (location) experienceObject.location = location; + if (divjdElement) experienceObject.jobDescription = jobDescription; + return experienceObject; + }) + .filter((exp) => exp != null); } else { return null; } }); - if(experiencia) { + if (experiencia) { profile.experience = experiencia; } - + const educacion = await page.evaluate(() => { - const elements = Array.from(document.querySelectorAll('h2>span[aria-hidden="true"]')); - const element = elements.find(span => { - return span?.textContent?.includes("Educación"); + const elements = Array.from( + document.querySelectorAll('h2>span[aria-hidden="true"]'), + ); + const element = elements.find((span) => { + return span?.textContent?.includes('Educación'); }); - - if(element){ - const parentDiv = element.parentElement?.parentElement?.parentElement?.parentElement?.parentElement; - const ul = parentDiv?.nextElementSibling?.firstElementChild;//UL - if (!ul || ul.tagName !== "UL") { + + if (element) { + const parentDiv = + element.parentElement?.parentElement?.parentElement?.parentElement + ?.parentElement; + const ul = parentDiv?.nextElementSibling?.firstElementChild; //UL + if (!ul || ul.tagName !== 'UL') { return null; } - + const liElements = ul.querySelectorAll('li'); - return Array.from(liElements).map(li => { - const edInstitution = './div/div[2]/div[1]/a/div/div/div/div/span[2]'; - var edInstitutionResult = document.evaluate(edInstitution, li, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null); - var edInstitutionElement = edInstitutionResult.singleNodeValue?.textContent?.trim(); - - const edCarreer = './div/div[2]/div[1]/a/span[1]/span[2]'; - var edCarreerResult = document.evaluate(edCarreer, li, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null); - var edCarreerElement = edCarreerResult.singleNodeValue?.textContent?.trim(); - - const edDate = './div/div[2]/div[1]/a/span[2]/span[2]'; - var edDateResult = document.evaluate(edDate, li, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null); - var edDateElement = edDateResult.singleNodeValue?.textContent?.trim(); - - - if (!edInstitution) { - return null; - } - - // Construct an object with the extracted data - const educationObject: Record = {} - - if(edInstitutionElement) educationObject.edInstitutionElement = edInstitutionElement; - if(edCarreerElement) educationObject.edCarreerElement = edCarreerElement; - if(edDateElement) educationObject.edDateElement = edDateElement; - - if (Object.keys(educationObject).length === 0) { - return null; - } - return educationObject; - }).filter(exp => exp != null); + return Array.from(liElements) + .map((li) => { + const edInstitution = + './div/div[2]/div[1]/a/div/div/div/div/span[2]'; + var edInstitutionResult = document.evaluate( + edInstitution, + li, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + null, + ); + var edInstitutionElement = + edInstitutionResult.singleNodeValue?.textContent?.trim(); + + const edCarreer = './div/div[2]/div[1]/a/span[1]/span[2]'; + var edCarreerResult = document.evaluate( + edCarreer, + li, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + null, + ); + var edCarreerElement = + edCarreerResult.singleNodeValue?.textContent?.trim(); + + const edDate = './div/div[2]/div[1]/a/span[2]/span[2]'; + var edDateResult = document.evaluate( + edDate, + li, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + null, + ); + var edDateElement = + edDateResult.singleNodeValue?.textContent?.trim(); + + if (!edInstitution) { + return null; + } + + // Construct an object with the extracted data + const educationObject: Record = {}; + if (edInstitutionElement) + educationObject.edInstitutionElement = edInstitutionElement; + if (edCarreerElement) + educationObject.edCarreerElement = edCarreerElement; + if (edDateElement) educationObject.edDateElement = edDateElement; + + if (Object.keys(educationObject).length === 0) { + return null; + } + return educationObject; + }) + .filter((exp) => exp != null); } else { return null; } }); - if(educacion) { + if (educacion) { profile.education = educacion; } - - return profile; } catch (error) { - console.error("An error occurred:", error); + console.error('An error occurred:', error); return null; } } @@ -191,35 +276,32 @@ export async function processPostRequest(prompt: string) { }); const page = await browser.newPage(); const cdp = await page.target().createCDPSession(); - + await page.setViewport({ width: 1920, height: 1024, }); - + // add ghost-cursor for maximum safety await Linkout.tools.loadCursor(page, true); - + // Login with LinkedIn await Linkout.services.login(page, cdp, { cookie: process.env.COOKIE, }); - + // Visit a LinkedIn profile await Linkout.services.visit(page, cdp, { url: prompt, }); - const profileData = await scrapeProfileData(page); - await page.close(); await browser.close(); - - return (profileData); + return profileData; } catch (error) { - return(error); + return error; } }