import nlp from 'compromise'
import axios from 'axios'

function removePunctuationMarks(text){
    var punctuation = /[\.,?!:]/g;
    var newText = text.replace(punctuation, " ");
    return newText;
}
function getP(text){
    let doc=nlp(text)
    console.log(doc.people().out("freq"))
}

function getOriginalTerms(text){
    // getP(text)
    let doc=nlp(text)
    let words=doc.terms().out("freq")
    let results=words.map(word=>{
        let term=nlp(word.normal)
        let lemma=term.verbs().toInfinitive().out()||term.nouns().toSingular().out()||word.normal
        
        //many term end with .(dot)
        lemma=lemma.replace(/[\.]/g,'')
        
        // Is lemma people name?
        let isName=false
        isName=(nlp(lemma).people().out()!='')

        let pos=term.out('tags')
        return {word:word.normal,lemma,pos,count:word.count,isName:isName}
    })
    return results
}

export async function readWordLevelCsv(){
    try {
        //read csv
        const response = await axios.get('/cefrj-vocabulary-profile-tpe_DS-Cathoven_不分詞性.csv');

        //parse csv
        const lines = response.data.split('\n');
        const result = [];
        const headerString=[]
        for (let i = 0; i < lines.length; i++) {
            const line = lines[i].trim();
            if (line === '') continue;

            const row = line.split(',');
            if(i==0){
                //first row:header name
                for(let rowI=0;rowI<row.length;rowI++){
                    headerString.push(row[rowI])
                }
            }
            else{
                let item={}
                for(let itemIndex=0;itemIndex<headerString.length;itemIndex++){
                    item[headerString[itemIndex]]=row[itemIndex]
                }
                result.push(item);
            }
        }
        return result
    } catch (error) {
        console.error('Error loading CSV:', error);
    }
}

export async function calculateGrade(text){
    let originalTerms=getOriginalTerms(text)
    let wordGrade=await readWordLevelCsv()
    console.log("originalterms:",originalTerms)

    let gradeData={A1:0}
    let removeList=[]

    for(let i=0;i<originalTerms.length;i++){
        let isFind=false
        for(let item of wordGrade){
            if(item.headword==originalTerms[i].lemma){
                isFind=true
                originalTerms[i].CEFR=item.CEFR
                if(gradeData[item.CEFR]){
                    gradeData[item.CEFR]+=originalTerms[i].count
                }
                else{
                    gradeData[item.CEFR]=originalTerms[i].count
                }
                if(item.CEFR!='A1'){
                    removeList.push(item.headword)    
                }
                break;
            }
            else{
                //not found in csv and it is people name
                if(originalTerms[i].isName){
                    isFind=true
                    originalTerms[i].CEFR='nInfinity'
                    if(gradeData.nInfinity){
                        gradeData.nInfinity+=originalTerms[i].count
                    }
                    else{
                        gradeData.nInfinity=originalTerms[i].count
                    }
                    removeList.push(originalTerms[i].lemma)
                    break;
                }
            }
        }
        if(!isFind){
            console.log('not found:',originalTerms[i])
        }
    }
    console.log(gradeData)
    return {gradeData,removeList}
}