--- library_name: peft base_model: facebook/bart-large --- # Model Card for Model ID ## Model Details ### Model Description - **Paper:** The model was published in "A Hybrid Architecture with Efficient Fine Tuning for Abstractive Patent Document Summarization" available in https://arxiv.org/abs/2503.10354 or https://ieeexplore.ieee.org/document/11030964 - **Developed by:** Nevidu Jayatilleke and Ruvan Weerasinghe - **Supported Language:** English - **Finetuned Domain:** Textile Patent Documents from BigPatent Dataset - **Finetuned from model:** facebook/bart-large - **Link to the Generalised Model:** https://huggingface.co/Nevidu/LexBartLo_2 ## How to use the model ```python from peft import PeftModel, PeftConfig from transformers import AutoModelForSeq2SeqLM, AutoTokenizer import nltk from nltk.tokenize import sent_tokenize, word_tokenize from nltk.corpus import stopwords from nltk.cluster.util import cosine_distance import numpy as np import networkx as nx import pandas as pd def preprocess_text(text): sentences = sent_tokenize(text) tokenized_sentences = [word_tokenize(sentence.lower()) for sentence in sentences] return tokenized_sentences def sentence_similarity(sentence1, sentence2): stop_words = set(stopwords.words('english')) filtered_sentence1 = [w for w in sentence1 if w not in stop_words] filtered_sentence2 = [w for w in sentence2 if w not in stop_words] all_words = list(set(filtered_sentence1 + filtered_sentence2)) vector1 = [filtered_sentence1.count(word) for word in all_words] vector2 = [filtered_sentence2.count(word) for word in all_words] return 1 - cosine_distance(vector1, vector2) def build_similarity_matrix(sentences): similarity_matrix = np.zeros((len(sentences), len(sentences))) for i in range(len(sentences)): for j in range(len(sentences)): if i != j: similarity_matrix[i][j] = sentence_similarity(sentences[i], sentences[j]) return similarity_matrix def apply_lexrank(similarity_matrix, damping=0.85, threshold=0.2, max_iter=100): nx_graph = nx.from_numpy_array(similarity_matrix) scores = nx.pagerank(nx_graph, alpha=damping, tol=threshold, max_iter=max_iter) return scores def get_top_sentences(sentences, scores): ranked_sentences = sorted(((scores[i], sentence) for i, sentence in enumerate(sentences)), reverse=True) top_sentences = [sentence for score, sentence in ranked_sentences] return top_sentences def extract_important_sentences(text): preprocessed_sentences = preprocess_text(text) similarity_matrix = build_similarity_matrix(preprocessed_sentences) scores = apply_lexrank(similarity_matrix) top_sentences = get_top_sentences(preprocessed_sentences, scores) paragraph = ' '.join([' '.join(sentence) for sentence in top_sentences]) return paragraph def summarize(text, max_tokens): peft_model = "Nevidu/LexBartLo_1" config = PeftConfig.from_pretrained(peft_model) # load base LLM model and tokenizer model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path) tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) # Load the Lora model model = PeftModel.from_pretrained(model, peft_model) sorted_text = extract_important_sentences(text) input_ids = tokenizer(sorted_text, return_tensors="pt", truncation=True).input_ids # with torch.inference_mode(): outputs = model.generate(input_ids=input_ids, max_new_tokens=max_tokens, do_sample=True, top_p=0.9) summary = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0] return summary text = """ Add your textile patent text""" max_tokens = 256 summary = summarize(text, max_tokens) ``` ## Citation ```json @inproceedings{jayatilleke2025hybrid, title={A Hybrid Architecture with Efficient Fine Tuning for Abstractive Patent Document Summarization}, author={Jayatilleke, Nevidu and Weerasinghe, Ruvan}, booktitle={2025 International Research Conference on Smart Computing and Systems Engineering (SCSE)}, pages={1--6}, year={2025}, organization={IEEE} } ``` ### Framework versions - PEFT 0.9.0