Skip to main content

Getting Started

🧰 Learn about individual Toolkits​

Here's a quick start guide to help you get started with the Bangla Toolkit (BNTK).

Installation​

Install the required packages for Bengali natural language processing:

npm install @bntk/tokenization @bntk/stemming @bntk/pos @bntk/ner @bntk/transliteration

Example Usage​

import * as ner from "@bntk/ner";
import * as pos from "@bntk/pos";
import * as stemming from "@bntk/stemming";
import * as tokenization from "@bntk/tokenization";
import * as transliteration from "@bntk/transliteration";

const contents = `āφāĻŽāĻŋ āĻŦāĻžāĻ‚āϞāĻž āϞ⧇āĻ–āĻžāϰ āϜāĻ¨ā§āϝ āϟ⧁āϞāĻŋāϟāĻ• āĻŦā§āϝāĻŦāĻšāĻžāϰ āĻ•āϰāĻ›āĻŋāĨ¤ āφāĻŽāĻžāϰ āĻŦāĻ¨ā§āϧ⧁ āϰāĻšāĻŋāĻŽ āĻĸāĻžāĻ•āĻžāϝāĻŧ āĻĨāĻžāϕ⧇āύāĨ¤`;

// ==== Tokenization ====
const sentences = tokenization.tokenizeSentences(contents);
console.log(sentences);
// ['āφāĻŽāĻŋ āĻŦāĻžāĻ‚āϞāĻž āϞ⧇āĻ–āĻžāϰ āϜāĻ¨ā§āϝ āϟ⧁āϞāĻŋāϟāĻ• āĻŦā§āϝāĻŦāĻšāĻžāϰ āĻ•āϰāĻ›āĻŋ', 'āφāĻŽāĻžāϰ āĻŦāĻ¨ā§āϧ⧁ āϰāĻšāĻŋāĻŽ āĻĸāĻžāĻ•āĻžāϝāĻŧ āĻĨāĻžāϕ⧇āύ']

const words = tokenization.tokenizeWords(sentences[0]);
console.log(words);
// ['āφāĻŽāĻŋ', 'āĻŦāĻžāĻ‚āϞāĻž', 'āϞ⧇āĻ–āĻžāϰ', 'āϜāĻ¨ā§āϝ', 'āϟ⧁āϞāĻŋāϟāĻ•', 'āĻŦā§āϝāĻŦāĻšāĻžāϰ', 'āĻ•āϰāĻ›āĻŋ']

// ==== Stemming ====
const stemmedWords = stemming.stemWords(words);
console.log(stemmedWords);
// ['āφāĻŽāĻŋ', 'āĻŦāĻžāĻ‚āϞāĻž', 'āϞ⧇āĻ–āĻžāϰ', 'āϜāĻ¨ā§āϝ', 'āϟ⧁āϞāĻŋāϟāĻ•', 'āĻŦā§āϝāĻŦāĻšāĻžāϰ', 'āĻ•āϰāĻ›āĻŋ']

// ==== POS ====
const taggedWords = pos.tagWords(stemmedWords);
console.log(taggedWords);
// ['āφāĻŽāĻŋ/PRON', 'āĻŦāĻžāĻ‚āϞāĻž/NOUN', 'āϞ⧇āĻ–āĻžāϰ/NOUN', 'āϜāĻ¨ā§āϝ/ADP', 'āϟ⧁āϞāĻŋāϟāĻ•/NOUN', 'āĻŦā§āϝāĻŦāĻšāĻžāϰ/VERB', 'āĻ•āϰāĻ›āĻŋ/VERB']

// ==== NER ====
const entities = ner.extractEntities(sentences[1]);
console.log(entities);
// [{type: 'PRONOUN', value: 'āφāĻŽāĻžāϰ', start: 0, end: 4}, {...}]

// ==== Transliteration ====
const transliterated = transliteration.transliterate("amar name apon.");
console.log(transliterated);
// 'āφāĻŽāĻžāϰ āύāĻžāĻŽ āφāĻĒāύāĨ¤'

📚 See API Reference​