Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
ReneR97 committed Feb 12, 2023
0 parents commit 37c2506
Show file tree
Hide file tree
Showing 6 changed files with 2,033 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Auto detect text files and perform LF normalization
* text=auto
130 changes: 130 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
.pnpm-debug.log*

# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json

# Runtime data
pids
*.pid
*.seed
*.pid.lock

# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov

# Coverage directory used by tools like istanbul
coverage
*.lcov

# nyc test coverage
.nyc_output

# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt

# Bower dependency directory (https://bower.io/)
bower_components

# node-waf configuration
.lock-wscript

# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release

# Dependency directories
node_modules/
jspm_packages/

# Snowpack dependency directory (https://snowpack.dev/)
web_modules/

# TypeScript cache
*.tsbuildinfo

# Optional npm cache directory
.npm

# Optional eslint cache
.eslintcache

# Optional stylelint cache
.stylelintcache

# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/

# Optional REPL history
.node_repl_history

# Output of 'npm pack'
*.tgz

# Yarn Integrity file
.yarn-integrity

# dotenv environment variable files
.env
.env.development.local
.env.test.local
.env.production.local
.env.local

# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache

# Next.js build output
.next
out

# Nuxt.js build / generate output
.nuxt
dist

# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and not Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public

# vuepress build output
.vuepress/dist

# vuepress v2.x temp and cache directory
.temp
.cache

# Docusaurus cache and generated files
.docusaurus

# Serverless directories
.serverless/

# FuseBox cache
.fusebox/

# DynamoDB Local files
.dynamodb/

# TernJS port file
.tern-port

# Stores VSCode versions used for testing VSCode extensions
.vscode-test

# yarn v2
.yarn/cache
.yarn/unplugged
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*
32 changes: 32 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
## Installation

Once you downloaded the Project, open the "index.js" file.

You will find the following variables:

```bash
const course_url = '';
const subtitle_lang = 'en';
const cookies;
const _credentials_ = "";
```

The course_url is just the full URL of the course you want to download. For example:

https://www.domestika.org/en/courses/3086-creating-animated-stories-with-after-effects/course

IMPORTANT: you have to be on the "content" page. You know you are on the right site when at the end of the URL it says "/course".

To get the _domestika_session and the \_credentials_ you will need to install a chrome extension called Cookie-Editor.

After you installed the extension, log into domestika and open the extension.

In the window popup, look for "\_domestika_session", click to open it and copy the contents of the Value field into the value field under cookies.

then look for the "_credentials_" cookie, copy the value of that into the "_credentials_" variable.

If you want to change the subtitles that will be downloaded, just put the preferred language into the "subtitle_lang" variable. But make sure the language is avaiable first.

After you have done that, just open a terminal and start the script with "npm run start".

All the courses will be downloaded in a folder called "domestika_courses/{coursename}/".
133 changes: 133 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
const puppeteer = require('puppeteer');
const cheerio = require('cheerio');
const util = require('util');
const exec = util.promisify(require('child_process').exec);
const m3u8ToMp4 = require('m3u8-to-mp4');
const fs = require('fs');
const converter = new m3u8ToMp4();

const course_url = '';
const subtitle_lang = 'en';

const cookies = [
{
name: '_domestika_session',
value: '',
domain: 'www.domestika.org',
},
];

const _credentials_ = '';

scrapeSite();

async function scrapeSite() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setCookie(...cookies);
await page.goto(course_url);
const html = await page.content();
const $ = cheerio.load(html);

let allVideos = [];

let units = $('h4.h2.unit-item__title a');

let title = $('h1.course-header-new__title')
.text()
.trim()
.replace(/[/\\?%*:|"<>]/g, '-');

let totalVideos = 1;

for (let i = 0; i < units.length - 1; i++) {
let videoData = await getInitialProps($(units[i]).attr('href'));
allVideos.push({
title: $(units[i])
.text()
.trim()
.replace(/[/\\?%*:|"<>]/g, '-'),
videoData: videoData,
});

totalVideos += videoData.length;
}

let access_token = decodeURI(_credentials_);
let regex_token = /accessToken\":\"(.*?)\"/gm;
access_token = regex_token.exec(access_token)[1];

let regex_final = /courses\/(.*?)-/gm;
let final_project_id = regex_final.exec($(units[units.length - 1]).attr('href'))[1];
let final_data = await fetchFromApi(`https://api.domestika.org/api/courses/${final_project_id}/final-project?with_server_timing=true`, 'finalProject.v1', access_token);
final_project_id = final_data.data.relationships.video.data.id;
final_data = await fetchFromApi(`https://api.domestika.org/api/videos/${final_project_id}?with_server_timing=true`, 'video.v1', access_token);

allVideos.push({
title: 'Final project',
videoData: [{ playbackURL: final_data.data.attributes.playbackUrl, title: 'Final project' }],
});

let count = 0;
for (let i = 0; i < allVideos.length; i++) {
const unit = allVideos[i];
for (let a = 0; a < unit.videoData.length; a++) {
const vData = unit.videoData[a];

if (!fs.existsSync(`domestika_courses/${title}/${unit.title}/`)) {
fs.mkdirSync(`domestika_courses/${title}/${unit.title}/`, { recursive: true });
}

await exec(`yt-dlp --allow-u -f "bv*[height<=1080]" ${vData.playbackURL} -o "domestika_courses/${title}/${unit.title}/${vData.title}.%(ext)s"`);
await exec(`yt-dlp --write-subs --sub-langs ${subtitle_lang} --skip-download --convert-subtitles srt "${vData.playbackURL}" -o "domestika_courses/${title}/${unit.title}/${vData.title}"`);

count++;
console.log(`Download ${count}/${totalVideos} Downloaded`);
}
}

await browser.close();

console.log('All Videos Downloaded');
}

async function getInitialProps(url) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setCookie(...cookies);
await page.goto(url);

const data = await page.evaluate(() => window.__INITIAL_PROPS__);

let videoData = [];

if (data && data != undefined) {
for (let i = 0; i < data.videos.length; i++) {
const el = data.videos[i];

videoData.push({
playbackURL: el.video.playbackURL,
title: el.video.title,
});
}
}

await browser.close();

return videoData;
}

async function fetchFromApi(apiURL, accept_version, access_token) {
const response = await fetch(apiURL, {
method: 'get',
headers: {
'Content-Type': 'application/vnd.api+json',
Accept: 'application/vnd.api+json',
'x-dmstk-accept-version': accept_version,
authorization: `Bearer ${access_token}`,
},
});
const data = await response.json();

return data;
}
Loading

0 comments on commit 37c2506

Please sign in to comment.