NLP – Practically

Machine Learning October 1, 2024 Natural Language Processing Comments Off 1,436 Views

NLTK

In [1]:

import webbrowser
webbrowser.open('https://www.youtube.com/watch?v=3Dg006VuOMk&list=RD3Dg006VuOMk&start_radio=1')

Out[1]:

True

In [2]:

!pip3 install nltk

Collecting nltk
  Using cached nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Requirement already satisfied: click in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from nltk) (8.1.7)
Requirement already satisfied: joblib in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from nltk) (1.4.2)
Requirement already satisfied: regex>=2021.8.3 in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from nltk) (2024.7.24)
Requirement already satisfied: tqdm in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from nltk) (4.66.5)
Requirement already satisfied: colorama in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from click->nltk) (0.4.6)
Using cached nltk-3.9.1-py3-none-any.whl (1.5 MB)
Installing collected packages: nltk
Successfully installed nltk-3.9.1

In [3]:

import nltk
nltk.download()

showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml

Out[3]:

True

In [9]:

from nltk.tokenize import word_tokenize, sent_tokenize
sample_sentence="Hi I am Learning NLP with Itronix Solutions"
a=word_tokenize(sample_sentence)
print(a)

['Hi', 'I', 'am', 'Learning', 'NLP', 'with', 'Itronix', 'Solutions']

In [10]:

print(sample_sentence.split())

['Hi', 'I', 'am', 'Learning', 'NLP', 'with', 'Itronix', 'Solutions']

In [12]:

sample_sentence2="Hi I am Learning NLP with Mr. Karan Arora. My age is 27"
b=sent_tokenize(sample_sentence2)
print(b)

['Hi I am Learning NLP with Mr. Karan Arora.', 'My age is 27']

In [13]:

EXAMPLE_TEXT = "Hello Mr. Karan Arora, how are you doing today? The weather is great, and Python is awesome. The sky is blue. You shouldn't eat chicken."

print(sent_tokenize(EXAMPLE_TEXT))

['Hello Mr. Karan Arora, how are you doing today?', 'The weather is great, and Python is awesome.', 'The sky is blue.', "You shouldn't eat chicken."]

In [14]:

print(word_tokenize(EXAMPLE_TEXT))

['Hello', 'Mr.', 'Karan', 'Arora', ',', 'how', 'are', 'you', 'doing', 'today', '?', 'The', 'weather', 'is', 'great', ',', 'and', 'Python', 'is', 'awesome', '.', 'The', 'sky', 'is', 'blue', '.', 'You', 'should', "n't", 'eat', 'chicken', '.']

In [15]:

from nltk.tokenize import word_tokenize
from nltk.tokenize import TreebankWordTokenizer
from nltk.tokenize import WordPunctTokenizer

In [16]:

tok2=TreebankWordTokenizer() #use short form
tok3=WordPunctTokenizer()
sent="Hi my name is Karan"
print(word_tokenize(sent))
print(tok2.tokenize(sent)) #or TreebankWordTokenizer().tokenize(sent)
print(tok3.tokenize(sent))

['Hi', 'my', 'name', 'is', 'Karan']
['Hi', 'my', 'name', 'is', 'Karan']
['Hi', 'my', 'name', 'is', 'Karan']

In [17]:

sent2="I won't let you bring wine"
print(word_tokenize(sent2))
print(tok2.tokenize(sent2))
print(tok3.tokenize(sent2))

['I', 'wo', "n't", 'let', 'you', 'bring', 'wine']
['I', 'wo', "n't", 'let', 'you', 'bring', 'wine']
['I', 'won', "'", 't', 'let', 'you', 'bring', 'wine']

In [19]:

from nltk.tokenize import word_tokenize, sent_tokenize
sample_sentence="Hi I am Learning NLP with Itronix Solutions"
a=word_tokenize(sample_sentence)
print(a)

['Hi', 'I', 'am', 'Learning', 'NLP', 'with', 'Itronix', 'Solutions']

In [20]:

from nltk.corpus import stopwords
stop_words=set(stopwords.words("english"))
stop_words

Out[20]:

{'a',
 'about',
 'above',
 'after',
 'again',
 'against',
 'ain',
 'all',
 'am',
 'an',
 'and',
 'any',
 'are',
 'aren',
 "aren't",
 'as',
 'at',
 'be',
 'because',
 'been',
 'before',
 'being',
 'below',
 'between',
 'both',
 'but',
 'by',
 'can',
 'couldn',
 "couldn't",
 'd',
 'did',
 'didn',
 "didn't",
 'do',
 'does',
 'doesn',
 "doesn't",
 'doing',
 'don',
 "don't",
 'down',
 'during',
 'each',
 'few',
 'for',
 'from',
 'further',
 'had',
 'hadn',
 "hadn't",
 'has',
 'hasn',
 "hasn't",
 'have',
 'haven',
 "haven't",
 'having',
 'he',
 'her',
 'here',
 'hers',
 'herself',
 'him',
 'himself',
 'his',
 'how',
 'i',
 'if',
 'in',
 'into',
 'is',
 'isn',
 "isn't",
 'it',
 "it's",
 'its',
 'itself',
 'just',
 'll',
 'm',
 'ma',
 'me',
 'mightn',
 "mightn't",
 'more',
 'most',
 'mustn',
 "mustn't",
 'my',
 'myself',
 'needn',
 "needn't",
 'no',
 'nor',
 'not',
 'now',
 'o',
 'of',
 'off',
 'on',
 'once',
 'only',
 'or',
 'other',
 'our',
 'ours',
 'ourselves',
 'out',
 'over',
 'own',
 're',
 's',
 'same',
 'shan',
 "shan't",
 'she',
 "she's",
 'should',
 "should've",
 'shouldn',
 "shouldn't",
 'so',
 'some',
 'such',
 't',
 'than',
 'that',
 "that'll",
 'the',
 'their',
 'theirs',
 'them',
 'themselves',
 'then',
 'there',
 'these',
 'they',
 'this',
 'those',
 'through',
 'to',
 'too',
 'under',
 'until',
 'up',
 've',
 'very',
 'was',
 'wasn',
 "wasn't",
 'we',
 'were',
 'weren',
 "weren't",
 'what',
 'when',
 'where',
 'which',
 'while',
 'who',
 'whom',
 'why',
 'will',
 'with',
 'won',
 "won't",
 'wouldn',
 "wouldn't",
 'y',
 'you',
 "you'd",
 "you'll",
 "you're",
 "you've",
 'your',
 'yours',
 'yourself',
 'yourselves'}

In [22]:

for word in a:
    if word not in stop_words:
        print(word,end=" ")

Hi I Learning NLP Itronix Solutions

In [26]:

from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
port=PorterStemmer()
words = ["program", "programs", "programer", "programing", "programers",'beautiful'] 
for w in words:
    print(port.stem(w))

program
program
program
program
program
beauti

In [27]:

from nltk.stem import LancasterStemmer
lstemmer=LancasterStemmer()
lstemmer.stem('beautiful')

Out[27]:

'beauty'

In [28]:

from nltk.stem import WordNetLemmatizer
lzr = WordNetLemmatizer()
print(lzr.lemmatize('working',pos='v')) #pos : part of speech
print("better :", lzr.lemmatize("better", pos ="a"))

work
better : good

In [ ]:

Machine Learning Tutorials, Courses and Certifications

NLP – Practically

Related Articles

Related

About Machine Learning

Check Also

Stop Words

Multiple Linear Regression:

Microsoft AI Classroom Series Assessment Answers

Polynomial Regression

Support Vector Regression

Decision Tree Regression

OpenCV Python Project for Vehicle Detection From an Image

Data Cleaning Melbourne House Dataset

ChatGPT Certification

Scatter Plot using Matplotlib

Indexing-Selection-Filtering in – Pandas Data Science Tutorials

OpenCV Python Project for Bus Detection from an Image

OpenCV Python Project for Vehicle Detection From an Image

OpenCV Python Project for Vehicle Detection in a Video frame

Airline Quality Service

Airport Quality Service